From dc5a9ef497147a274a922ec4a8a1dd1d42b23fe9 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Fri, 14 Oct 2022 18:15:49 -0400 Subject: [PATCH 01/76] link_to depreciation coverage Futureproof for link_to depreciation. https://bugs.python.org/issue39950 --- bdfr/downloader.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bdfr/downloader.py b/bdfr/downloader.py index 3b5a7e1..4f508fa 100644 --- a/bdfr/downloader.py +++ b/bdfr/downloader.py @@ -114,7 +114,10 @@ class RedditDownloader(RedditConnector): f'Resource hash {resource_hash} from submission {submission.id} downloaded elsewhere') return elif self.args.make_hard_links: - self.master_hash_list[resource_hash].link_to(destination) + try: + destination.hardlink_to(self.master_hash_list[resource_hash]) + except: + self.master_hash_list[resource_hash].link_to(destination) logger.info( f'Hard link made linking {destination} to {self.master_hash_list[resource_hash]}' f' in submission {submission.id}') From 0e90a2e900c5c9a6d2d5ec1cb64c33883deac6e7 Mon Sep 17 00:00:00 2001 From: Soulsuck24 <79275800+Soulsuck24@users.noreply.github.com> Date: Mon, 24 Oct 2022 12:45:26 -0400 Subject: [PATCH 02/76] Switch Redgifs to temporary tokens Initial switch to temporary tokens for Redgifs. Gets a new auth token for every API request. --- bdfr/site_downloaders/redgifs.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/bdfr/site_downloaders/redgifs.py b/bdfr/site_downloaders/redgifs.py index 615c21a..bbaef12 100644 --- a/bdfr/site_downloaders/redgifs.py +++ b/bdfr/site_downloaders/redgifs.py @@ -28,7 +28,12 @@ class Redgifs(BaseDownloader): except AttributeError: raise SiteDownloaderError(f'Could not extract Redgifs ID from {url}') - content = Redgifs.retrieve_url(f'https://api.redgifs.com/v1/gifs/{redgif_id}') + auth_token = json.loads(Redgifs.retrieve_url('https://api.redgifs.com/v2/auth/temporary').text)['token'] + headers = { + 'Authorization': f'Bearer {auth_token}', + } + + content = Redgifs.retrieve_url(f'https://api.redgifs.com/v2/gifs/{redgif_id}', headers=headers) if content is None: raise SiteDownloaderError('Could not read the page source') @@ -40,16 +45,16 @@ class Redgifs(BaseDownloader): out = set() try: - if response_json['gfyItem']['type'] == 1: # type 1 is a video - out.add(response_json['gfyItem']['mp4Url']) - elif response_json['gfyItem']['type'] == 2: # type 2 is an image - if 'gallery' in response_json['gfyItem']: + if response_json['gif']['type'] == 1: # type 1 is a video + out.add(response_json['gif']['urls']['hd']) + elif response_json['gif']['type'] == 2: # type 2 is an image + if response_json['gif']['gallery']: content = Redgifs.retrieve_url( - f'https://api.redgifs.com/v2/gallery/{response_json["gfyItem"]["gallery"]}') + f'https://api.redgifs.com/v2/gallery/{response_json["gif"]["gallery"]}') response_json = json.loads(content.text) out = {p['urls']['hd'] for p in response_json['gifs']} else: - out.add(response_json['gfyItem']['content_urls']['large']['url']) + out.add(response_json['gif']['urls']['hd']) else: raise KeyError except (KeyError, AttributeError): From dfc21295e3f1712a163a637d1cf524ba835cb3f6 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Sat, 5 Nov 2022 10:51:33 -0400 Subject: [PATCH 03/76] Add Delay for Reddit support Adds support for delayforreddit.com non-direct links. --- bdfr/site_downloaders/delay_for_reddit.py | 21 ++++++++++++++++ bdfr/site_downloaders/download_factory.py | 3 +++ .../site_downloaders/test_delay_for_reddit.py | 25 +++++++++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 bdfr/site_downloaders/delay_for_reddit.py create mode 100644 tests/site_downloaders/test_delay_for_reddit.py diff --git a/bdfr/site_downloaders/delay_for_reddit.py b/bdfr/site_downloaders/delay_for_reddit.py new file mode 100644 index 0000000..149e403 --- /dev/null +++ b/bdfr/site_downloaders/delay_for_reddit.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 + +import logging +from typing import Optional + +from praw.models import Submission + +from bdfr.site_authenticator import SiteAuthenticator +from bdfr.resource import Resource +from bdfr.site_downloaders.base_downloader import BaseDownloader + +logger = logging.getLogger(__name__) + + +class DelayForReddit(BaseDownloader): + def __init__(self, post: Submission): + super().__init__(post) + + def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: + media = DelayForReddit.retrieve_url(self.post.url) + return [Resource(self.post, media.url, Resource.retry_download(media.url))] diff --git a/bdfr/site_downloaders/download_factory.py b/bdfr/site_downloaders/download_factory.py index f5e8d99..75beeae 100644 --- a/bdfr/site_downloaders/download_factory.py +++ b/bdfr/site_downloaders/download_factory.py @@ -7,6 +7,7 @@ from typing import Type from bdfr.exceptions import NotADownloadableLinkError from bdfr.site_downloaders.base_downloader import BaseDownloader +from bdfr.site_downloaders.delay_for_reddit import DelayForReddit from bdfr.site_downloaders.direct import Direct from bdfr.site_downloaders.erome import Erome from bdfr.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallback @@ -34,6 +35,8 @@ class DownloadFactory: return Direct elif re.match(r'erome\.com.*', sanitised_url): return Erome + elif re.match(r'delayforreddit\.com', sanitised_url): + return DelayForReddit elif re.match(r'reddit\.com/gallery/.*', sanitised_url): return Gallery elif re.match(r'patreon\.com.*', sanitised_url): diff --git a/tests/site_downloaders/test_delay_for_reddit.py b/tests/site_downloaders/test_delay_for_reddit.py new file mode 100644 index 0000000..5e0e1c8 --- /dev/null +++ b/tests/site_downloaders/test_delay_for_reddit.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +# coding=utf-8 + +from unittest.mock import Mock + +import pytest + +from bdfr.resource import Resource +from bdfr.site_downloaders.delay_for_reddit import DelayForReddit + + +@pytest.mark.online +@pytest.mark.parametrize(('test_url', 'expected_hash'), ( + ('https://www.delayforreddit.com/dfr/calvin6123/MjU1Njc5NQ==', '3300f28c2f9358d05667985c9c04210d'), + ('https://www.delayforreddit.com/dfr/RoXs_26/NDAwMzAyOQ==', '09b7b01719dff45ab197bdc08b90f78a'), +)) +def test_download_resource(test_url: str, expected_hash: str): + mock_submission = Mock() + mock_submission.url = test_url + test_site = DelayForReddit(mock_submission) + resources = test_site.find_resources() + assert len(resources) == 1 + assert isinstance(resources[0], Resource) + resources[0].download() + assert resources[0].hash.hexdigest() == expected_hash From 3c7f85725eecff1cfc00458646311ec01c31b50a Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Tue, 8 Nov 2022 12:06:20 -0500 Subject: [PATCH 04/76] Narrow except Narrow except to AttributeError --- bdfr/downloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdfr/downloader.py b/bdfr/downloader.py index 4f508fa..7709add 100644 --- a/bdfr/downloader.py +++ b/bdfr/downloader.py @@ -116,7 +116,7 @@ class RedditDownloader(RedditConnector): elif self.args.make_hard_links: try: destination.hardlink_to(self.master_hash_list[resource_hash]) - except: + except AttributeError: self.master_hash_list[resource_hash].link_to(destination) logger.info( f'Hard link made linking {destination} to {self.master_hash_list[resource_hash]}' From 77711c243a07a773bd2817c76dd3903a087b8035 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Tue, 8 Nov 2022 13:54:18 -0500 Subject: [PATCH 05/76] Fix test for deleted user on post tested post now showing deleted as user causing tests to fail. Updated to working post. --- tests/test_file_name_formatter.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/test_file_name_formatter.py b/tests/test_file_name_formatter.py index 21cb8a6..e7f1ebe 100644 --- a/tests/test_file_name_formatter.py +++ b/tests/test_file_name_formatter.py @@ -51,7 +51,7 @@ def do_test_path_equality(result: Path, expected: str) -> bool: @pytest.fixture(scope='session') def reddit_submission(reddit_instance: praw.Reddit) -> praw.models.Submission: - return reddit_instance.submission(id='lgilgt') + return reddit_instance.submission(id='w22m5l') @pytest.mark.parametrize(('test_format_string', 'expected'), ( @@ -86,12 +86,12 @@ def test_check_format_string_validity(test_string: str, expected: bool): @pytest.mark.online @pytest.mark.reddit @pytest.mark.parametrize(('test_format_string', 'expected'), ( - ('{SUBREDDIT}', 'Mindustry'), - ('{REDDITOR}', 'Gamer_player_boi'), - ('{POSTID}', 'lgilgt'), - ('{FLAIR}', 'Art'), - ('{SUBREDDIT}_{TITLE}', 'Mindustry_Toxopid that is NOT humane >:('), - ('{REDDITOR}_{TITLE}_{POSTID}', 'Gamer_player_boi_Toxopid that is NOT humane >:(_lgilgt') + ('{SUBREDDIT}', 'formula1'), + ('{REDDITOR}', 'Kirsty-Blue'), + ('{POSTID}', 'w22m5l'), + ('{FLAIR}', 'Social Media rall'), + ('{SUBREDDIT}_{TITLE}', 'formula1_George Russel acknowledges the Twitter trend about him'), + ('{REDDITOR}_{TITLE}_{POSTID}', 'Kirsty-Blue_George Russel acknowledges the Twitter trend about him_w22m5l') )) def test_format_name_real(test_format_string: str, expected: str, reddit_submission: praw.models.Submission): test_formatter = FileNameFormatter(test_format_string, '', '') @@ -105,17 +105,17 @@ def test_format_name_real(test_format_string: str, expected: str, reddit_submiss ( '{SUBREDDIT}', '{POSTID}', - 'test/Mindustry/lgilgt.png', + 'test/formula1/w22m5l.png', ), ( '{SUBREDDIT}', '{TITLE}_{POSTID}', - 'test/Mindustry/Toxopid that is NOT humane >:(_lgilgt.png', + 'test/formula1/George Russel acknowledges the Twitter trend about him_w22m5l.png', ), ( '{SUBREDDIT}', '{REDDITOR}_{TITLE}_{POSTID}', - 'test/Mindustry/Gamer_player_boi_Toxopid that is NOT humane >:(_lgilgt.png', + 'test/formula1/Kirsty-Blue_George Russel acknowledges the Twitter trend about him_w22m5l.png', ), )) def test_format_full( @@ -148,10 +148,10 @@ def test_format_full_conform( @pytest.mark.online @pytest.mark.reddit @pytest.mark.parametrize(('format_string_directory', 'format_string_file', 'index', 'expected'), ( - ('{SUBREDDIT}', '{POSTID}', None, 'test/Mindustry/lgilgt.png'), - ('{SUBREDDIT}', '{POSTID}', 1, 'test/Mindustry/lgilgt_1.png'), - ('{SUBREDDIT}', '{POSTID}', 2, 'test/Mindustry/lgilgt_2.png'), - ('{SUBREDDIT}', '{TITLE}_{POSTID}', 2, 'test/Mindustry/Toxopid that is NOT humane >:(_lgilgt_2.png'), + ('{SUBREDDIT}', '{POSTID}', None, 'test/formula1/w22m5l.png'), + ('{SUBREDDIT}', '{POSTID}', 1, 'test/formula1/w22m5l_1.png'), + ('{SUBREDDIT}', '{POSTID}', 2, 'test/formula1/w22m5l_2.png'), + ('{SUBREDDIT}', '{TITLE}_{POSTID}', 2, 'test/formula1/George Russel acknowledges the Twitter trend about him_w22m5l_2.png'), )) def test_format_full_with_index_suffix( format_string_directory: str, From f3c7d796aa764522567eb251d719555329b61a83 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Tue, 8 Nov 2022 15:37:21 -0500 Subject: [PATCH 06/76] Update for other failing tests Seems there was some overlap in test names that was contributing to the test errors. Updated hash and test name. --- tests/site_downloaders/test_pornhub.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/site_downloaders/test_pornhub.py b/tests/site_downloaders/test_pornhub.py index cbe3662..e0933b0 100644 --- a/tests/site_downloaders/test_pornhub.py +++ b/tests/site_downloaders/test_pornhub.py @@ -13,10 +13,9 @@ from bdfr.site_downloaders.pornhub import PornHub @pytest.mark.online @pytest.mark.slow @pytest.mark.parametrize(('test_url', 'expected_hash'), ( - ('https://www.pornhub.com/view_video.php?viewkey=ph6074c59798497', 'd9b99e4ebecf2d8d67efe5e70d2acf8a'), - ('https://www.pornhub.com/view_video.php?viewkey=ph5ede121f0d3f8', ''), + ('https://www.pornhub.com/view_video.php?viewkey=ph6074c59798497', 'ad52a0f4fce8f99df0abed17de1d04c7'), )) -def test_find_resources_good(test_url: str, expected_hash: str): +def test_hash_resources_good(test_url: str, expected_hash: str): test_submission = MagicMock() test_submission.url = test_url downloader = PornHub(test_submission) From 5341d6f12caa4cc8deebff1cb849ac5b0fa0c18e Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 20 Nov 2022 18:54:56 +1000 Subject: [PATCH 07/76] Add catch for per-submission praw errors --- bdfr/archiver.py | 26 +++++++++++++++----------- bdfr/downloader.py | 6 +++++- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/bdfr/archiver.py b/bdfr/archiver.py index 214111f..4bd24f5 100644 --- a/bdfr/archiver.py +++ b/bdfr/archiver.py @@ -8,6 +8,7 @@ from typing import Iterator import dict2xml import praw.models +import prawcore import yaml from bdfr.archive_entry.base_archive_entry import BaseArchiveEntry @@ -28,17 +29,20 @@ class Archiver(RedditConnector): def download(self): for generator in self.reddit_lists: for submission in generator: - if (submission.author and submission.author.name in self.args.ignore_user) or \ - (submission.author is None and 'DELETED' in self.args.ignore_user): - logger.debug( - f'Submission {submission.id} in {submission.subreddit.display_name} skipped' - f' due to {submission.author.name if submission.author else "DELETED"} being an ignored user') - continue - if submission.id in self.excluded_submission_ids: - logger.debug(f'Object {submission.id} in exclusion list, skipping') - continue - logger.debug(f'Attempting to archive submission {submission.id}') - self.write_entry(submission) + try: + if (submission.author and submission.author.name in self.args.ignore_user) or \ + (submission.author is None and 'DELETED' in self.args.ignore_user): + logger.debug( + f'Submission {submission.id} in {submission.subreddit.display_name} skipped' + f' due to {submission.author.name if submission.author else "DELETED"} being an ignored user') + continue + if submission.id in self.excluded_submission_ids: + logger.debug(f'Object {submission.id} in exclusion list, skipping') + continue + logger.debug(f'Attempting to archive submission {submission.id}') + self.write_entry(submission) + except prawcore.PrawcoreException as e: + logger.error(f'Submission {submission.id} failed to be archived due to a PRAW exception: {e}') def get_submissions_from_link(self) -> list[list[praw.models.Submission]]: supplied_submissions = [] diff --git a/bdfr/downloader.py b/bdfr/downloader.py index 7709add..057f6af 100644 --- a/bdfr/downloader.py +++ b/bdfr/downloader.py @@ -12,6 +12,7 @@ from pathlib import Path import praw import praw.exceptions import praw.models +import prawcore from bdfr import exceptions as errors from bdfr.configuration import Configuration @@ -42,7 +43,10 @@ class RedditDownloader(RedditConnector): def download(self): for generator in self.reddit_lists: for submission in generator: - self._download_submission(submission) + try: + self._download_submission(submission) + except prawcore.PrawcoreException as e: + logger.error(f'Submission {submission.id} failed to download due to a PRAW exception: {e}') def _download_submission(self, submission: praw.models.Submission): if submission.id in self.excluded_submission_ids: From 1385545e26a745b9fae422a5bd75af499d131e9b Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Mon, 21 Nov 2022 14:35:57 +1000 Subject: [PATCH 08/76] Add tests for downloader --- tests/integration_tests/test_download_integration.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration_tests/test_download_integration.py b/tests/integration_tests/test_download_integration.py index a9f0e0e..44f62c2 100644 --- a/tests/integration_tests/test_download_integration.py +++ b/tests/integration_tests/test_download_integration.py @@ -232,6 +232,8 @@ def test_cli_download_long(test_args: list[str], tmp_path: Path): ['--subreddit', 'donaldtrump', '-L', 10], # Banned subreddit ['--user', 'djnish', '--user', 'helen_darten', '-m', 'cuteanimalpics', '-L', 10], ['--subreddit', 'friends', '-L', 10], + ['-l', 'ijy4ch'], + ['-l', 'kw4wjm'], )) def test_cli_download_soft_fail(test_args: list[str], tmp_path: Path): runner = CliRunner() From 4143c53ff13b0c6c376c21696548c0d52e69cfe3 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Mon, 21 Nov 2022 15:47:13 +1000 Subject: [PATCH 09/76] Add tests --- tests/integration_tests/test_archive_integration.py | 2 ++ tests/integration_tests/test_download_integration.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration_tests/test_archive_integration.py b/tests/integration_tests/test_archive_integration.py index 7b9a48d..3380820 100644 --- a/tests/integration_tests/test_archive_integration.py +++ b/tests/integration_tests/test_archive_integration.py @@ -36,6 +36,8 @@ def create_basic_args_for_archive_runner(test_args: list[str], run_path: Path): ['-l', 'gstd4hk'], ['-l', 'm2601g', '-f', 'yaml'], ['-l', 'n60t4c', '-f', 'xml'], + ['-l', 'ijy4ch'], # user deleted post + ['-l', 'kw4wjm'], # post from banned subreddit )) def test_cli_archive_single(test_args: list[str], tmp_path: Path): runner = CliRunner() diff --git a/tests/integration_tests/test_download_integration.py b/tests/integration_tests/test_download_integration.py index 44f62c2..83f972d 100644 --- a/tests/integration_tests/test_download_integration.py +++ b/tests/integration_tests/test_download_integration.py @@ -232,8 +232,8 @@ def test_cli_download_long(test_args: list[str], tmp_path: Path): ['--subreddit', 'donaldtrump', '-L', 10], # Banned subreddit ['--user', 'djnish', '--user', 'helen_darten', '-m', 'cuteanimalpics', '-L', 10], ['--subreddit', 'friends', '-L', 10], - ['-l', 'ijy4ch'], - ['-l', 'kw4wjm'], + ['-l', 'ijy4ch'], # user deleted post + ['-l', 'kw4wjm'], # post from banned subreddit )) def test_cli_download_soft_fail(test_args: list[str], tmp_path: Path): runner = CliRunner() From 42416db8b9dd3bdd965fa76b04122e651f458271 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Mon, 21 Nov 2022 21:37:59 -0500 Subject: [PATCH 10/76] Fix PRAW deprecations Fix depreciations in MultiredditHelper and CommentForest. --- bdfr/archive_entry/base_archive_entry.py | 2 +- bdfr/archive_entry/submission_archive_entry.py | 2 +- bdfr/connector.py | 2 +- .../test_archive_integration.py | 17 +++++++++++++++-- tests/test_connector.py | 2 +- 5 files changed, 19 insertions(+), 6 deletions(-) diff --git a/bdfr/archive_entry/base_archive_entry.py b/bdfr/archive_entry/base_archive_entry.py index 516e5d0..a33381e 100644 --- a/bdfr/archive_entry/base_archive_entry.py +++ b/bdfr/archive_entry/base_archive_entry.py @@ -32,7 +32,7 @@ class BaseArchiveEntry(ABC): 'parent_id': in_comment.parent_id, 'replies': [], } - in_comment.replies.replace_more(0) + in_comment.replies.replace_more(limit=None) for reply in in_comment.replies: out_dict['replies'].append(BaseArchiveEntry._convert_comment_to_dict(reply)) return out_dict diff --git a/bdfr/archive_entry/submission_archive_entry.py b/bdfr/archive_entry/submission_archive_entry.py index 538aea8..c124e0f 100644 --- a/bdfr/archive_entry/submission_archive_entry.py +++ b/bdfr/archive_entry/submission_archive_entry.py @@ -45,7 +45,7 @@ class SubmissionArchiveEntry(BaseArchiveEntry): def _get_comments(self) -> list[dict]: logger.debug(f'Retrieving full comment tree for submission {self.source.id}') comments = [] - self.source.comments.replace_more(0) + self.source.comments.replace_more(limit=None) for top_level_comment in self.source.comments: comments.append(self._convert_comment_to_dict(top_level_comment)) return comments diff --git a/bdfr/connector.py b/bdfr/connector.py index 61ed8f4..9aa2a6e 100644 --- a/bdfr/connector.py +++ b/bdfr/connector.py @@ -323,7 +323,7 @@ class RedditConnector(metaclass=ABCMeta): out = [] for multi in self.split_args_input(self.args.multireddit): try: - multi = self.reddit_instance.multireddit(self.args.user[0], multi) + multi = self.reddit_instance.multireddit(redditor=self.args.user[0], name=multi) if not multi.subreddits: raise errors.BulkDownloaderException out.append(self.create_filtered_listing_generator(multi)) diff --git a/tests/integration_tests/test_archive_integration.py b/tests/integration_tests/test_archive_integration.py index 3380820..caf6fcb 100644 --- a/tests/integration_tests/test_archive_integration.py +++ b/tests/integration_tests/test_archive_integration.py @@ -36,8 +36,6 @@ def create_basic_args_for_archive_runner(test_args: list[str], run_path: Path): ['-l', 'gstd4hk'], ['-l', 'm2601g', '-f', 'yaml'], ['-l', 'n60t4c', '-f', 'xml'], - ['-l', 'ijy4ch'], # user deleted post - ['-l', 'kw4wjm'], # post from banned subreddit )) def test_cli_archive_single(test_args: list[str], tmp_path: Path): runner = CliRunner() @@ -153,3 +151,18 @@ def test_cli_archive_links_exclusion(test_args: list[str], tmp_path: Path): assert result.exit_code == 0 assert 'in exclusion list' in result.output assert 'Attempting to archive' not in result.output + +@pytest.mark.online +@pytest.mark.reddit +@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') +@pytest.mark.parametrize('test_args', ( + ['-l', 'ijy4ch'], # user deleted post + ['-l', 'kw4wjm'], # post from banned subreddit +)) +def test_cli_archive_soft_fail(test_args: list[str], tmp_path: Path): + runner = CliRunner() + test_args = create_basic_args_for_archive_runner(test_args, tmp_path) + result = runner.invoke(cli, test_args) + assert result.exit_code == 0 + assert 'failed to be archived due to a PRAW exception' in result.output + assert 'Attempting to archive' not in result.output diff --git a/tests/test_connector.py b/tests/test_connector.py index 142baa6..2cddcdf 100644 --- a/tests/test_connector.py +++ b/tests/test_connector.py @@ -286,7 +286,7 @@ def test_get_multireddits_public( downloader_mock.create_filtered_listing_generator.return_value = \ RedditConnector.create_filtered_listing_generator( downloader_mock, - reddit_instance.multireddit(test_user, test_multireddits[0]), + reddit_instance.multireddit(redditor=test_user, name=test_multireddits[0]), ) results = RedditConnector.get_multireddits(downloader_mock) results = [sub for res in results for sub in res] From 87104e7e6a0a244c7cad60f85df231f775071323 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Thu, 24 Nov 2022 10:48:17 +1000 Subject: [PATCH 11/76] Catch exceptions in cloner --- bdfr/cloner.py | 9 +++++++-- .../test_clone_integration.py | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/bdfr/cloner.py b/bdfr/cloner.py index 979f50f..47e03f8 100644 --- a/bdfr/cloner.py +++ b/bdfr/cloner.py @@ -3,6 +3,8 @@ import logging +import prawcore + from bdfr.archiver import Archiver from bdfr.configuration import Configuration from bdfr.downloader import RedditDownloader @@ -17,5 +19,8 @@ class RedditCloner(RedditDownloader, Archiver): def download(self): for generator in self.reddit_lists: for submission in generator: - self._download_submission(submission) - self.write_entry(submission) + try: + self._download_submission(submission) + self.write_entry(submission) + except prawcore.PrawcoreException as e: + logger.error(f'Submission {submission.id} failed to be cloned due to a PRAW exception: {e}') diff --git a/tests/integration_tests/test_clone_integration.py b/tests/integration_tests/test_clone_integration.py index f9bf91a..288793b 100644 --- a/tests/integration_tests/test_clone_integration.py +++ b/tests/integration_tests/test_clone_integration.py @@ -36,6 +36,8 @@ def create_basic_args_for_cloner_runner(test_args: list[str], tmp_path: Path): ['-s', 'TrollXChromosomes/', '-L', 1], ['-l', 'eiajjw'], ['-l', 'xl0lhi'], + ['-l', 'ijy4ch'], # user deleted post + ['-l', 'kw4wjm'], # post from banned subreddit )) def test_cli_scrape_general(test_args: list[str], tmp_path: Path): runner = CliRunner() @@ -44,3 +46,19 @@ def test_cli_scrape_general(test_args: list[str], tmp_path: Path): assert result.exit_code == 0 assert 'Downloaded submission' in result.output assert 'Record for entry item' in result.output + + +@pytest.mark.online +@pytest.mark.reddit +@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') +@pytest.mark.parametrize('test_args', ( + ['-l', 'ijy4ch'], # user deleted post + ['-l', 'kw4wjm'], # post from banned subreddit +)) +def test_cli_scrape_soft_fail(test_args: list[str], tmp_path: Path): + runner = CliRunner() + test_args = create_basic_args_for_cloner_runner(test_args, tmp_path) + result = runner.invoke(cli, test_args) + assert result.exit_code == 0 + assert 'Downloaded submission' not in result.output + assert 'Record for entry item' not in result.output From 9ee13aea23b7f7932ab737d1cc481bbc0bedbd39 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Sat, 26 Nov 2022 14:36:19 -0500 Subject: [PATCH 12/76] Update tests Suspended user in two tests. Updated hashes and yt-dlp version. Removed success check on known failure. --- requirements.txt | 2 +- tests/integration_tests/test_clone_integration.py | 2 -- .../fallback_downloaders/test_ytdlp_fallback.py | 6 +++--- tests/site_downloaders/test_vreddit.py | 2 +- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index 83378f0..62e6925 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,4 @@ ffmpeg-python>=0.2.0 praw>=7.2.0 pyyaml>=5.4.1 requests>=2.25.1 -yt-dlp>=2022.9.1 \ No newline at end of file +yt-dlp>=2022.11.11 diff --git a/tests/integration_tests/test_clone_integration.py b/tests/integration_tests/test_clone_integration.py index 288793b..8046687 100644 --- a/tests/integration_tests/test_clone_integration.py +++ b/tests/integration_tests/test_clone_integration.py @@ -36,8 +36,6 @@ def create_basic_args_for_cloner_runner(test_args: list[str], tmp_path: Path): ['-s', 'TrollXChromosomes/', '-L', 1], ['-l', 'eiajjw'], ['-l', 'xl0lhi'], - ['-l', 'ijy4ch'], # user deleted post - ['-l', 'kw4wjm'], # post from banned subreddit )) def test_cli_scrape_general(test_args: list[str], tmp_path: Path): runner = CliRunner() diff --git a/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py b/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py index 92ba27d..29e72c5 100644 --- a/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py +++ b/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py @@ -15,7 +15,7 @@ from bdfr.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallb ('https://www.youtube.com/watch?v=P19nvJOmqCc', True), ('https://www.example.com/test', False), ('https://milesmatrix.bandcamp.com/album/la-boum/', False), - ('https://v.redd.it/54i8fvzev3u81', True), + ('https://v.redd.it/dlr54z8p182a1', True), )) def test_can_handle_link(test_url: str, expected: bool): result = YtdlpFallback.can_handle_link(test_url) @@ -36,8 +36,8 @@ def test_info_extraction_bad(test_url: str): @pytest.mark.parametrize(('test_url', 'expected_hash'), ( ('https://streamable.com/dt46y', 'b7e465adaade5f2b6d8c2b4b7d0a2878'), ('https://streamable.com/t8sem', '49b2d1220c485455548f1edbc05d4ecf'), - ('https://www.reddit.com/r/specializedtools/comments/n2nw5m/bamboo_splitter/', '03087ce64f88f438bad6849858c9b7f0'), - ('https://v.redd.it/9z1dnk3xr5k61', '9ce39c8e46b6534a0b3f164a792d51c8'), + ('https://www.reddit.com/r/specializedtools/comments/n2nw5m/bamboo_splitter/', '6c6ff46e04b4e33a755ae2a9b5a45ac5'), + ('https://v.redd.it/9z1dnk3xr5k61', '226cee353421c7aefb05c92424cc8cdd'), )) def test_find_resources(test_url: str, expected_hash: str): test_submission = MagicMock() diff --git a/tests/site_downloaders/test_vreddit.py b/tests/site_downloaders/test_vreddit.py index da05c1b..54ffcf8 100644 --- a/tests/site_downloaders/test_vreddit.py +++ b/tests/site_downloaders/test_vreddit.py @@ -13,7 +13,7 @@ from bdfr.site_downloaders.vreddit import VReddit @pytest.mark.online @pytest.mark.slow @pytest.mark.parametrize(('test_url', 'expected_hash'), ( - ('https://www.reddit.com/user/Xomb_Forever/comments/u5p2kj/hold_up/', '379ef5cd87203544d51caee31e72d210'), + ('https://reddit.com/r/Unexpected/comments/z4xsuj/omg_thats_so_cute/', '1ffab5e5c0cc96db18108e4f37e8ca7f'), )) def test_find_resources_good(test_url: str, expected_hash: str): test_submission = MagicMock() From 48c96beba24bbb7637e1e01200e3e146596496b4 Mon Sep 17 00:00:00 2001 From: Soulsuck24 <79275800+Soulsuck24@users.noreply.github.com> Date: Sun, 27 Nov 2022 18:07:43 -0500 Subject: [PATCH 13/76] Redgifs improvements Add check to verify token was received. Update headers sent to content API. Add availability check for videos to resolve last part of #472 where only SD version is available. --- bdfr/site_downloaders/redgifs.py | 13 +++++++++++-- tests/site_downloaders/test_redgifs.py | 27 ++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/bdfr/site_downloaders/redgifs.py b/bdfr/site_downloaders/redgifs.py index bbaef12..dd19413 100644 --- a/bdfr/site_downloaders/redgifs.py +++ b/bdfr/site_downloaders/redgifs.py @@ -2,7 +2,7 @@ import json import re -import urllib.parse +import requests from typing import Optional from praw.models import Submission @@ -29,7 +29,13 @@ class Redgifs(BaseDownloader): raise SiteDownloaderError(f'Could not extract Redgifs ID from {url}') auth_token = json.loads(Redgifs.retrieve_url('https://api.redgifs.com/v2/auth/temporary').text)['token'] + if not auth_token: + raise SiteDownloaderError('Unable to retrieve Redgifs API token') + headers = { + 'referer': 'https://www.redgifs.com/', + 'origin': 'https://www.redgifs.com', + 'content-type': 'application/json', 'Authorization': f'Bearer {auth_token}', } @@ -46,7 +52,10 @@ class Redgifs(BaseDownloader): out = set() try: if response_json['gif']['type'] == 1: # type 1 is a video - out.add(response_json['gif']['urls']['hd']) + if requests.get(response_json['gif']['urls']['hd'], headers=headers).ok: + out.add(response_json['gif']['urls']['hd']) + else: + out.add(response_json['gif']['urls']['sd']) elif response_json['gif']['type'] == 2: # type 2 is an image if response_json['gif']['gallery']: content = Redgifs.retrieve_url( diff --git a/tests/site_downloaders/test_redgifs.py b/tests/site_downloaders/test_redgifs.py index b73ee95..9a6d132 100644 --- a/tests/site_downloaders/test_redgifs.py +++ b/tests/site_downloaders/test_redgifs.py @@ -41,8 +41,7 @@ def test_get_link(test_url: str, expected: set[str]): ('https://redgifs.com/watch/springgreendecisivetaruca', {'8dac487ac49a1f18cc1b4dabe23f0869'}), ('https://redgifs.com/watch/leafysaltydungbeetle', {'076792c660b9c024c0471ef4759af8bd'}), ('https://www.redgifs.com/watch/palegoldenrodrawhalibut', {'46d5aa77fe80c6407de1ecc92801c10e'}), - ('https://redgifs.com/watch/hollowintentsnowyowl', - {'5ee51fa15e0a58e98f11dea6a6cca771'}), + ('https://redgifs.com/watch/hollowintentsnowyowl', {'5ee51fa15e0a58e98f11dea6a6cca771'}), ('https://www.redgifs.com/watch/lustrousstickywaxwing', {'b461e55664f07bed8d2f41d8586728fa', '30ba079a8ed7d7adf17929dc3064c10f', @@ -60,3 +59,27 @@ def test_download_resource(test_url: str, expected_hashes: set[str]): [res.download() for res in results] hashes = set([res.hash.hexdigest() for res in results]) assert hashes == set(expected_hashes) + + +@pytest.mark.online +@pytest.mark.parametrize(('test_url', 'expected_link', 'expected_hash'), ( + ('https://redgifs.com/watch/flippantmemorablebaiji', {'FlippantMemorableBaiji-mobile.mp4'}, + {'41a5fb4865367ede9f65fc78736f497a'}), + ('https://redgifs.com/watch/thirstyunfortunatewaterdragons', {'thirstyunfortunatewaterdragons-mobile.mp4'}, + {'1a51dad8fedb594bdd84f027b3cbe8af'}), + ('https://redgifs.com/watch/conventionalplainxenopterygii', {'conventionalplainxenopterygii-mobile.mp4'}, + {'2e1786b3337da85b80b050e2c289daa4'}) +)) +def test_hd_soft_fail(test_url: str, expected_link: set[str], expected_hash: set[str]): + link = Redgifs._get_link(test_url) + link = list(link) + patterns = [r'https://thumbs\d\.redgifs\.com/' + e + r'.*' for e in expected_link] + assert all([re.match(p, r) for p in patterns] for r in link) + mock_submission = Mock() + mock_submission.url = test_url + test_site = Redgifs(mock_submission) + results = test_site.find_resources() + assert all([isinstance(res, Resource) for res in results]) + [res.download() for res in results] + hashes = set([res.hash.hexdigest() for res in results]) + assert hashes == set(expected_hash) From fecb65c53a2e47c966d24c104f7aa6e891cb93b2 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Tue, 29 Nov 2022 11:48:24 -0500 Subject: [PATCH 14/76] Lint run Linting run through various things. Mostly markdownlint. --- .github/ISSUE_TEMPLATE/bug_report.md | 16 ++-- .github/ISSUE_TEMPLATE/feature_request.md | 3 +- .../ISSUE_TEMPLATE/site-support-request.md | 4 +- README.md | 91 +++++++++++-------- bdfr/default_config.cfg | 2 +- devscripts/configure.ps1 | 6 +- devscripts/configure.sh | 4 +- docs/ARCHITECTURE.md | 10 +- docs/CODE_OF_CONDUCT.md | 4 +- docs/CONTRIBUTING.md | 32 ++++--- scripts/README.md | 10 +- scripts/extract_failed_ids.ps1 | 18 ++-- scripts/extract_successful_ids.ps1 | 14 +-- scripts/print_summary.ps1 | 14 +-- .../failed_resource_error.txt | 1 - .../example_logfiles/succeed_score_filter.txt | 2 +- setup.cfg | 2 +- 17 files changed, 130 insertions(+), 103 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index efc9757..e05bb36 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -10,20 +10,24 @@ assignees: '' - [ ] I am reporting a bug. - [ ] I am running the latest version of BDfR - [ ] I have read the [Opening an issue](https://github.com/aliparlakci/bulk-downloader-for-reddit/blob/master/docs/CONTRIBUTING.md#opening-an-issue) - + ## Description + A clear and concise description of what the bug is. ## Command -``` + +```text Paste here the command(s) that causes the bug ``` -## Environment (please complete the following information): - - OS: [e.g. Windows 10] - - Python version: [e.g. 3.9.4] +## Environment (please complete the following information) + +- OS: [e.g. Windows 10] +- Python version: [e.g. 3.9.4] ## Logs -``` + +```text Paste the log output here. ``` diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index ce9f0b3..c286de6 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -10,6 +10,7 @@ assignees: '' - [ ] I am requesting a feature. - [ ] I am running the latest version of BDfR - [ ] I have read the [Opening an issue](../../README.md#configuration) - + ## Description + Clearly state the current situation and issues you experience. Then, explain how this feature would solve these issues and make life easier. Also, explain the feature with as many detail as possible. diff --git a/.github/ISSUE_TEMPLATE/site-support-request.md b/.github/ISSUE_TEMPLATE/site-support-request.md index fd400aa..2eea710 100644 --- a/.github/ISSUE_TEMPLATE/site-support-request.md +++ b/.github/ISSUE_TEMPLATE/site-support-request.md @@ -10,9 +10,11 @@ assignees: '' - [ ] I am requesting a site support. - [ ] I am running the latest version of BDfR - [ ] I have read the [Opening an issue](../../README.md#configuration) - + ## Site + Provide a URL to domain of the site. ## Example posts + Provide example reddit posts with the domain. diff --git a/README.md b/README.md index 2c245c6..7914308 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # Bulk Downloader for Reddit + [![PyPI version](https://img.shields.io/pypi/v/bdfr.svg)](https://pypi.python.org/pypi/bdfr) [![PyPI downloads](https://img.shields.io/pypi/dm/bdfr)](https://pypi.python.org/pypi/bdfr) [![Python Test](https://github.com/aliparlakci/bulk-downloader-for-reddit/actions/workflows/test.yml/badge.svg?branch=master)](https://github.com/aliparlakci/bulk-downloader-for-reddit/actions/workflows/test.yml) @@ -10,19 +11,24 @@ If you wish to open an issue, please read [the guide on opening issues](docs/CON Included in this README are a few example Bash tricks to get certain behaviour. For that, see [Common Command Tricks](#common-command-tricks). ## Installation + *Bulk Downloader for Reddit* needs Python version 3.9 or above. Please update Python before installation to meet the requirement. Then, you can install it as such: + ```bash python3 -m pip install bdfr --upgrade ``` + **To update BDFR**, run the above command again after the installation. ### AUR Package + If on Arch Linux or derivative operating systems such as Manjaro, the BDFR can be installed through the AUR. -- Latest Release: https://aur.archlinux.org/packages/python-bdfr/ -- Latest Development Build: https://aur.archlinux.org/packages/python-bdfr-git/ +- Latest Release: +- Latest Development Build: ### Source code + If you want to use the source code or make contributions, refer to [CONTRIBUTING](docs/CONTRIBUTING.md#preparing-the-environment-for-development) ## Usage @@ -52,18 +58,23 @@ However, these commands are not enough. You should chain parameters in [Options] ```bash python3 -m bdfr download ./path/to/output --subreddit Python -L 10 ``` + ```bash python3 -m bdfr download ./path/to/output --user reddituser --submitted -L 100 ``` + ```bash python3 -m bdfr download ./path/to/output --user reddituser --submitted --all-comments --comment-context ``` + ```bash python3 -m bdfr download ./path/to/output --user me --saved --authenticate -L 25 --file-scheme '{POSTID}' ``` + ```bash python3 -m bdfr download ./path/to/output --subreddit 'Python, all, mindustry' -L 10 --make-hard-links ``` + ```bash python3 -m bdfr archive ./path/to/output --subreddit all --format yaml -L 500 --folder-scheme '' ``` @@ -87,6 +98,7 @@ subreddit: ``` would be equilavent to (take note that in YAML there is `file_scheme` instead of `file-scheme`): + ```bash python3 -m bdfr download ./path/to/output --skip mp4 --skip avi --file-scheme "{UPVOTES}_{REDDITOR}_{POSTID}_{DATE}" -L 10 -S top --subreddit EarthPorn --subreddit CityPorn ``` @@ -156,8 +168,8 @@ The following options are common between both the `archive` and `download` comma - `-m, --multireddit` - This is the name of a multireddit to add as a source - Can be specified multiple times - - This can be done by using `-m` multiple times - - Multireddits can also be used to provide CSV multireddits e.g. `-m 'chess, favourites'` + - This can be done by using `-m` multiple times + - Multireddits can also be used to provide CSV multireddits e.g. `-m 'chess, favourites'` - The specified multireddits must all belong to the user specified with the `--user` option - `-s, --subreddit` - This adds a subreddit as a source @@ -237,7 +249,6 @@ The following options apply only to the `download` command. This command downloa - `--max-score-ratio` - This skips all submissions which have higher than specified upvote ratio - ### Archiver Options The following options are for the `archive` command specifically. @@ -291,18 +302,18 @@ For more details on the configuration file and the values therein, see [Configur The naming and folder schemes for the BDFR are both completely customisable. A number of different fields can be given which will be replaced with properties from a submission when downloading it. The scheme format takes the form of `{KEY}`, where `KEY` is a string from the below list. - - `DATE` - - `FLAIR` - - `POSTID` - - `REDDITOR` - - `SUBREDDIT` - - `TITLE` - - `UPVOTES` +- `DATE` +- `FLAIR` +- `POSTID` +- `REDDITOR` +- `SUBREDDIT` +- `TITLE` +- `UPVOTES` Each of these can be enclosed in curly bracket, `{}`, and included in the name. For example, to just title every downloaded post with the unique submission ID, you can use `{POSTID}`. Static strings can also be included, such as `download_{POSTID}` which will not change from submission to submission. For example, the previous string will result in the following submission file names: - - `download_aaaaaa.png` - - `download_bbbbbb.png` +- `download_aaaaaa.png` +- `download_bbbbbb.png` At least one key *must* be included in the file scheme, otherwise an error will be thrown. The folder scheme however, can be null or a simple static string. In the former case, all files will be placed in the folder specified with the `directory` argument. If the folder scheme is a static string, then all submissions will be placed in a folder of that name. In both cases, there will be no separation between all submissions. @@ -312,19 +323,19 @@ It is highly recommended that the file name scheme contain the parameter `{POSTI The configuration files are, by default, stored in the configuration directory for the user. This differs depending on the OS that the BDFR is being run on. For Windows, this will be: - - `C:\Users\\AppData\Local\BDFR\bdfr` +- `C:\Users\\AppData\Local\BDFR\bdfr` If Python has been installed through the Windows Store, the folder will appear in a different place. Note that the hash included in the file path may change from installation to installation. - - `C:\Users\\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\Local\BDFR\bdfr` +- `C:\Users\\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\Local\BDFR\bdfr` On Mac OSX, this will be: - - `~/Library/Application Support/bdfr`. - +- `~/Library/Application Support/bdfr`. + Lastly, on a Linux system, this will be: - - `~/.config/bdfr/` +- `~/.config/bdfr/` The logging output for each run of the BDFR will be saved to this directory in the file `log_output.txt`. If you need to submit a bug, it is this file that you will need to submit with the report. @@ -332,16 +343,16 @@ The logging output for each run of the BDFR will be saved to this directory in t The `config.cfg` is the file that supplies the BDFR with the configuration to use. At the moment, the following keys **must** be included in the configuration file supplied. - - `client_id` - - `client_secret` - - `scopes` +- `client_id` +- `client_secret` +- `scopes` The following keys are optional, and defaults will be used if they cannot be found. - - `backup_log_count` - - `max_wait_time` - - `time_format` - - `disabled_modules` +- `backup_log_count` +- `max_wait_time` +- `time_format` +- `disabled_modules` All of these should not be modified unless you know what you're doing, as the default values will enable the BDFR to function just fine. A configuration is included in the BDFR when it is installed, and this will be placed in the configuration directory as the default. @@ -360,12 +371,16 @@ The individual modules of the BDFR, used to download submissions from websites, Modules can be disabled through the command line interface for the BDFR or more permanently in the configuration file via the `disabled_modules` option. The list of downloaders that can be disabled are the following. Note that they are case-insensitive. - `Direct` +- `DelayForReddit` - `Erome` - `Gallery` (Reddit Image Galleries) - `Gfycat` - `Imgur` +- `PornHub` - `Redgifs` - `SelfPost` (Reddit Text Post) +- `Vidble` +- `VReddit` (Reddit Video Post) - `Youtube` - `YoutubeDlFallback` @@ -393,17 +408,19 @@ The logfiles that the BDFR outputs are consistent and quite detailed and in a fo ## List of currently supported sources - - Direct links (links leading to a file) - - Erome - - Gfycat - - Gif Delivery Network - - Imgur - - Reddit Galleries - - Reddit Text Posts - - Reddit Videos - - Redgifs - - YouTube - - Streamable +- Direct links (links leading to a file) +- Delay for Reddit +- Erome +- Gfycat +- Gif Delivery Network +- Imgur +- Reddit Galleries +- Reddit Text Posts +- Reddit Videos +- Redgifs +- Vidble +- YouTube + - Any source supported by [YT-DLP](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md) should be compatable ## Contributing diff --git a/bdfr/default_config.cfg b/bdfr/default_config.cfg index c601152..2b2976f 100644 --- a/bdfr/default_config.cfg +++ b/bdfr/default_config.cfg @@ -4,4 +4,4 @@ client_secret = 7CZHY6AmKweZME5s50SfDGylaPg scopes = identity, history, read, save, mysubreddits backup_log_count = 3 max_wait_time = 120 -time_format = ISO \ No newline at end of file +time_format = ISO diff --git a/devscripts/configure.ps1 b/devscripts/configure.ps1 index b096266..f5a2152 100644 --- a/devscripts/configure.ps1 +++ b/devscripts/configure.ps1 @@ -1,5 +1,5 @@ if (-not ([string]::IsNullOrEmpty($env:REDDIT_TOKEN))) { - copy .\\bdfr\\default_config.cfg .\\test_config.cfg - echo "`nuser_token = $env:REDDIT_TOKEN" >> ./test_config.cfg -} \ No newline at end of file + Copy-Item .\\bdfr\\default_config.cfg .\\test_config.cfg + Write-Output "`nuser_token = $env:REDDIT_TOKEN" >> ./test_config.cfg +} diff --git a/devscripts/configure.sh b/devscripts/configure.sh index d9c96df..f4528b1 100755 --- a/devscripts/configure.sh +++ b/devscripts/configure.sh @@ -1,4 +1,6 @@ -if [ ! -z "$REDDIT_TOKEN" ] +#!/bin/bash + +if [ -n "$REDDIT_TOKEN" ] then cp ./bdfr/default_config.cfg ./test_config.cfg echo -e "\nuser_token = $REDDIT_TOKEN" >> ./test_config.cfg diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 7b69f99..33d4297 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -18,18 +18,18 @@ Another major part of the ethos of the design is DOTADIW, Do One Thing And Do It The BDFR is organised around a central object, the RedditDownloader class. The Archiver object extends and inherits from this class. - 1. The RedditDownloader parses all the arguments and configuration options, held in the Configuration object, and creates a variety of internal objects for use, such as the file name formatter, download filter, etc. - + 1. The RedditDownloader parses all the arguments and configuration options, held in the Configuration object, and creates a variety of internal objects for use, such as the file name formatter, download filter, etc. + 2. The RedditDownloader scrapes raw submissions from Reddit via several methods relating to different sources. A source is defined as a single stream of submissions from a subreddit, multireddit, or user list. - 3. These raw submissions are passed to the DownloaderFactory class to select the specialised downloader class to use. Each of these are for a specific website or link type, with some catch-all classes like Direct. + 3. These raw submissions are passed to the DownloaderFactory class to select the specialised downloader class to use. Each of these are for a specific website or link type, with some catch-all classes like Direct. - 4. The BaseDownloader child, spawned by DownloaderFactory, takes the link and does any necessary processing to find the direct link to the actual resource. + 4. The BaseDownloader child, spawned by DownloaderFactory, takes the link and does any necessary processing to find the direct link to the actual resource. 5. This is returned to the RedditDownloader in the form of a Resource object. This holds the URL and some other information for the final resource. 6. The Resource is passed through the DownloadFilter instantiated in step 1. - + 7. The destination file name for the Resource is calculated. If it already exists, then the Resource will be discarded. 8. Here the actual data is downloaded to the Resource and a hash calculated which is used to find duplicates. diff --git a/docs/CODE_OF_CONDUCT.md b/docs/CODE_OF_CONDUCT.md index 26edfa9..fe0374d 100644 --- a/docs/CODE_OF_CONDUCT.md +++ b/docs/CODE_OF_CONDUCT.md @@ -69,8 +69,6 @@ members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, -available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html +available at [homepage]: https://www.contributor-covenant.org - - diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index e08f1c2..5aafda2 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -11,19 +11,21 @@ All communication on GitHub, Discord, email, or any other medium must conform to **Before opening a new issue**, be sure that no issues regarding your problem already exist. If a similar issue exists, try to contribute to the issue. ### Bugs -When opening an issue about a bug, **please provide the full log file for the run in which the bug occurred**. This log file is named `log_output.txt` in the configuration folder. Check the [README](../README.md) for information on where this is. This log file will contain all the information required for the developers to recreate the bug. -If you do not have or cannot find the log file, then at minimum please provide the **Reddit ID for the submission** or comment which caused the issue. Also copy in the command that you used to run the BDFR from the command line, as that will also provide helpful information when trying to find and fix the bug. If needed, more information will be asked in the thread of the bug. +When opening an issue about a bug, **please provide the full log file for the run in which the bug occurred**. This log file is named `log_output.txt` in the configuration folder. Check the [README](../README.md) for information on where this is. This log file will contain all the information required for the developers to recreate the bug. + +If you do not have or cannot find the log file, then at minimum please provide the **Reddit ID for the submission** or comment which caused the issue. Also copy in the command that you used to run the BDFR from the command line, as that will also provide helpful information when trying to find and fix the bug. If needed, more information will be asked in the thread of the bug. ### Feature requests -In the case of requesting a feature or an enhancement, there are fewer requirements. However, please be clear in what you would like the BDFR to do and also how the feature/enhancement would be used or would be useful to more people. It is crucial that the feature is justified. Any feature request without a concrete reason for it to be implemented has a very small chance to get accepted. Be aware that proposed enhancements may be rejected for multiple reasons, or no reason, at the discretion of the developers. + +In the case of requesting a feature or an enhancement, there are fewer requirements. However, please be clear in what you would like the BDFR to do and also how the feature/enhancement would be used or would be useful to more people. It is crucial that the feature is justified. Any feature request without a concrete reason for it to be implemented has a very small chance to get accepted. Be aware that proposed enhancements may be rejected for multiple reasons, or no reason, at the discretion of the developers. ## Pull Requests Before creating a pull request (PR), check out [ARCHITECTURE](ARCHITECTURE.md) for a short introduction to the way that the BDFR is coded and how the code is organised. Also read the [Style Guide](#style-guide) section below before actually writing any code. Once you have done both of these, the below list shows the path that should be followed when writing a PR. - + 1. If an issue does not already exist, open one that will relate to the PR. 2. Ensure that any changes fit into the architecture specified above. 3. Ensure that you have written tests that cover the new code. @@ -32,24 +34,26 @@ Once you have done both of these, the below list shows the path that should be f 6. Open a pull request that references the relevant issue. 7. Expect changes or suggestions and heed the Code of Conduct. We're all volunteers here. -Someone will review your pull request as soon as possible, but remember that all maintainers are volunteers and this won't happen immediately. Once it is approved, congratulations! Your code is now part of the BDFR. +Someone will review your pull request as soon as possible, but remember that all maintainers are volunteers and this won't happen immediately. Once it is approved, congratulations! Your code is now part of the BDFR. ## Preparing the environment for development -Bulk Downloader for Reddit requires Python 3.9 at minimum. First, ensure that your Python installation satisfies this. +Bulk Downloader for Reddit requires Python 3.9 at minimum. First, ensure that your Python installation satisfies this. BDfR is built in a way that it can be packaged and installed via `pip`. This places BDfR next to other Python packages and enables you to run the program from any directory. Since it is managed by pip, you can also uninstall it. To install the program, clone the repository and run pip inside the project's root directory: + ```bash -$ git clone https://github.com/aliparlakci/bulk-downloader-for-reddit.git -$ cd ./bulk-downloader-for-reddit -$ python3 -m pip install -e . +git clone https://github.com/aliparlakci/bulk-downloader-for-reddit.git +cd ./bulk-downloader-for-reddit +python3 -m pip install -e . ``` -**`-e`** parameter creates a link to that folder. That is, any change inside the folder affects the package immidiately. So, when developing, you can be sure that the package is not stale and Python is always running your latest changes. (Due to this linking, moving/removing/renaming the folder might break it) +**`-e`** parameter creates a link to that folder. That is, any change inside the folder affects the package immidiately. So, when developing, you can be sure that the package is not stale and Python is always running your latest changes. (Due to this linking, moving/removing/renaming the folder might break it) Then, you can run the program from anywhere in your disk as such: + ```bash $ python3 -m bdfr ``` @@ -104,20 +108,20 @@ To exclude one or more marks, the following command can be used, substituting th pytest -m "not online" pytest -m "not reddit and not authenticated" ``` - + ### Configuration for authenticated tests There should be configuration file `test_config.cfg` in the project's root directory to be able to run the integration tests with reddit authentication. See how to create such files [here](../README.md#configuration). The easiest way of creating this file is copying your existing `default_config.cfg` file from the path stated in the previous link and renaming it to `test_config.cfg` Be sure that user_token key exists in test_config.cfg. - + --- - + For more details, review the pytest documentation that is freely available online. Many IDEs also provide integrated functionality to run and display the results from tests, and almost all of them support pytest in some capacity. This would be the recommended method due to the additional debugging and general capabilities. ### Writing Tests -When writing tests, ensure that they follow the style guide. The BDFR uses pytest to run tests. Wherever possible, parameterise tests, even if you only have one test case. This makes it easier to expand in the future, as the ultimate goal is to have multiple test cases for every test, instead of just one. +When writing tests, ensure that they follow the style guide. The BDFR uses pytest to run tests. Wherever possible, parameterise tests, even if you only have one test case. This makes it easier to expand in the future, as the ultimate goal is to have multiple test cases for every test, instead of just one. If required, use of mocks is expected to simplify tests and reduce the resources or complexity required. Tests should be as small as possible and test as small a part of the code as possible. Comprehensive or integration tests are run with the `click` framework and are located in their own file. diff --git a/scripts/README.md b/scripts/README.md index 4bb098b..2f77eb5 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -2,10 +2,10 @@ Due to the verboseness of the logs, a great deal of information can be gathered quite easily from the BDFR's logfiles. In this folder, there is a selection of scripts that parse these logs, scraping useful bits of information. Since the logfiles are recurring patterns of strings, it is a fairly simple matter to write scripts that utilise tools included on most Linux systems. - - [Script to extract all successfully downloaded IDs](#extract-all-successfully-downloaded-ids) - - [Script to extract all failed download IDs](#extract-all-failed-ids) - - [Timestamp conversion](#converting-bdfrv1-timestamps-to-bdfrv2-timestamps) - - [Printing summary statistics for a run](#printing-summary-statistics) +- [Script to extract all successfully downloaded IDs](#extract-all-successfully-downloaded-ids) +- [Script to extract all failed download IDs](#extract-all-failed-ids) +- [Timestamp conversion](#converting-bdfrv1-timestamps-to-bdfrv2-timestamps) +- [Printing summary statistics for a run](#printing-summary-statistics) ## Extract all Successfully Downloaded IDs @@ -58,7 +58,7 @@ A simple script has been included to print sumamry statistics for a run of the B This will create an output like the following: -``` +```text Downloaded submissions: 250 Failed downloads: 103 Files already downloaded: 20073 diff --git a/scripts/extract_failed_ids.ps1 b/scripts/extract_failed_ids.ps1 index be2d2cb..4820d04 100644 --- a/scripts/extract_failed_ids.ps1 +++ b/scripts/extract_failed_ids.ps1 @@ -1,21 +1,21 @@ if (Test-Path -Path $args[0] -PathType Leaf) { - $file=$args[0] + $file=$args[0] } else { - Write-Host "CANNOT FIND LOG FILE" - Exit 1 + Write-Host "CANNOT FIND LOG FILE" + Exit 1 } -if ($args[1] -ne $null) { - $output=$args[1] - Write-Host "Outputting IDs to $output" +if ($null -ne $args[1]) { + $output=$args[1] + Write-Host "Outputting IDs to $output" } else { - $output="./failed.txt" + $output="./failed.txt" } -Select-String -Path $file -Pattern "Could not download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 11 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output +Select-String -Path $file -Pattern "Could not download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 11 | Select-Object -First 1 } | ForEach-Object { $_.substring(0,$_.Length-1) } >> $output Select-String -Path $file -Pattern "Failed to download resource" | ForEach-Object { -split $_.Line | Select-Object -Skip 14 | Select-Object -First 1 } >> $output -Select-String -Path $file -Pattern "failed to download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output +Select-String -Path $file -Pattern "failed to download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } | ForEach-Object { $_.substring(0,$_.Length-1) } >> $output Select-String -Path $file -Pattern "Failed to write file" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } >> $output Select-String -Path $file -Pattern "skipped due to disabled module" | ForEach-Object { -split $_.Line | Select-Object -Skip 8 | Select-Object -First 1 } >> $output diff --git a/scripts/extract_successful_ids.ps1 b/scripts/extract_successful_ids.ps1 index 00722f1..70c463b 100644 --- a/scripts/extract_successful_ids.ps1 +++ b/scripts/extract_successful_ids.ps1 @@ -1,17 +1,17 @@ if (Test-Path -Path $args[0] -PathType Leaf) { - $file=$args[0] + $file=$args[0] } else { - Write-Host "CANNOT FIND LOG FILE" - Exit 1 + Write-Host "CANNOT FIND LOG FILE" + Exit 1 } -if ($args[1] -ne $null) { - $output=$args[1] - Write-Host "Outputting IDs to $output" +if ($null -ne $args[1]) { + $output=$args[1] + Write-Host "Outputting IDs to $output" } else { - $output="./successful.txt" + $output="./successful.txt" } Select-String -Path $file -Pattern "Downloaded submission" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output diff --git a/scripts/print_summary.ps1 b/scripts/print_summary.ps1 index 5d85b09..1428a86 100644 --- a/scripts/print_summary.ps1 +++ b/scripts/print_summary.ps1 @@ -1,17 +1,17 @@ if (Test-Path -Path $args[0] -PathType Leaf) { - $file=$args[0] + $file=$args[0] } else { - Write-Host "CANNOT FIND LOG FILE" - Exit 1 + Write-Host "CANNOT FIND LOG FILE" + Exit 1 } -if ($args[1] -ne $null) { - $output=$args[1] - Write-Host "Outputting IDs to $output" +if ($null -ne $args[1]) { + $output=$args[1] + Write-Host "Outputting IDs to $output" } else { - $output="./successful.txt" + $output="./successful.txt" } Write-Host -NoNewline "Downloaded submissions: " diff --git a/scripts/tests/example_logfiles/failed_resource_error.txt b/scripts/tests/example_logfiles/failed_resource_error.txt index c2ba24c..ef477eb 100644 --- a/scripts/tests/example_logfiles/failed_resource_error.txt +++ b/scripts/tests/example_logfiles/failed_resource_error.txt @@ -1,2 +1 @@ [2021-06-12 11:18:25,794 - bdfr.downloader - ERROR] - Failed to download resource https://i.redd.it/61fniokpjq471.jpg in submission nxv3dt with downloader Direct: Unrecoverable error requesting resource: HTTP Code 404 - diff --git a/scripts/tests/example_logfiles/succeed_score_filter.txt b/scripts/tests/example_logfiles/succeed_score_filter.txt index 8f31ef7..6430a34 100644 --- a/scripts/tests/example_logfiles/succeed_score_filter.txt +++ b/scripts/tests/example_logfiles/succeed_score_filter.txt @@ -1,2 +1,2 @@ [2022-07-23 14:04:14,095 - bdfr.downloader - DEBUG] - Submission ljyy27 filtered due to score 15 < [50] -[2022-07-23 14:04:14,104 - bdfr.downloader - DEBUG] - Submission ljyy27 filtered due to score 16 > [1] \ No newline at end of file +[2022-07-23 14:04:14,104 - bdfr.downloader - DEBUG] - Submission ljyy27 filtered due to score 16 > [1] diff --git a/setup.cfg b/setup.cfg index 67a1deb..725f372 100644 --- a/setup.cfg +++ b/setup.cfg @@ -10,7 +10,7 @@ author_email = parlakciali@gmail.com maintainer = Serene Arc maintainer_email = serenical@gmail.com license = GPLv3 -classifiers = +classifiers = Programming Language :: Python :: 3 License :: OSI Approved :: GNU General Public License v3 (GPLv3) Natural Language :: English From 5cb3c2c6359ca9a3bf0c3621bb0ac7ad649a2730 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Tue, 29 Nov 2022 12:48:34 -0500 Subject: [PATCH 15/76] Update README.md Options not available to download, only archive. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7914308..d0fecb1 100644 --- a/README.md +++ b/README.md @@ -63,10 +63,6 @@ python3 -m bdfr download ./path/to/output --subreddit Python -L 10 python3 -m bdfr download ./path/to/output --user reddituser --submitted -L 100 ``` -```bash -python3 -m bdfr download ./path/to/output --user reddituser --submitted --all-comments --comment-context -``` - ```bash python3 -m bdfr download ./path/to/output --user me --saved --authenticate -L 25 --file-scheme '{POSTID}' ``` @@ -75,6 +71,10 @@ python3 -m bdfr download ./path/to/output --user me --saved --authenticate -L 25 python3 -m bdfr download ./path/to/output --subreddit 'Python, all, mindustry' -L 10 --make-hard-links ``` +```bash +python3 -m bdfr archive ./path/to/output --user reddituser --submitted --all-comments --comment-context +``` + ```bash python3 -m bdfr archive ./path/to/output --subreddit all --format yaml -L 500 --folder-scheme '' ``` From 175513fbb70054623d448259acebbacc2b06c61f Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Tue, 29 Nov 2022 22:38:45 -0500 Subject: [PATCH 16/76] Add console entry point for pipx support Adds support for pipx with console entry point. closes #702 --- README.md | 123 +++++++++++++++++++++++++------------------ docs/CONTRIBUTING.md | 2 +- setup.cfg | 6 ++- 3 files changed, 79 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 2c245c6..5aae6f6 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # Bulk Downloader for Reddit + [![PyPI version](https://img.shields.io/pypi/v/bdfr.svg)](https://pypi.python.org/pypi/bdfr) [![PyPI downloads](https://img.shields.io/pypi/dm/bdfr)](https://pypi.python.org/pypi/bdfr) [![Python Test](https://github.com/aliparlakci/bulk-downloader-for-reddit/actions/workflows/test.yml/badge.svg?branch=master)](https://github.com/aliparlakci/bulk-downloader-for-reddit/actions/workflows/test.yml) @@ -10,19 +11,30 @@ If you wish to open an issue, please read [the guide on opening issues](docs/CON Included in this README are a few example Bash tricks to get certain behaviour. For that, see [Common Command Tricks](#common-command-tricks). ## Installation -*Bulk Downloader for Reddit* needs Python version 3.9 or above. Please update Python before installation to meet the requirement. Then, you can install it as such: + +*Bulk Downloader for Reddit* needs Python version 3.9 or above. Please update Python before installation to meet the requirement. Then, you can install it via pip with: + ```bash python3 -m pip install bdfr --upgrade ``` + +or with [pipx](https://pypa.github.io/pipx) with: + +```bash +python3 -m pipx install bdfr --upgrade +``` + **To update BDFR**, run the above command again after the installation. ### AUR Package + If on Arch Linux or derivative operating systems such as Manjaro, the BDFR can be installed through the AUR. -- Latest Release: https://aur.archlinux.org/packages/python-bdfr/ -- Latest Development Build: https://aur.archlinux.org/packages/python-bdfr-git/ +- Latest Release: +- Latest Development Build: ### Source code + If you want to use the source code or make contributions, refer to [CONTRIBUTING](docs/CONTRIBUTING.md#preparing-the-environment-for-development) ## Usage @@ -36,42 +48,47 @@ Note that the `clone` command is not a true, failthful clone of Reddit. It simpl After installation, run the program from any directory as shown below: ```bash -python3 -m bdfr download +bdfr download ``` ```bash -python3 -m bdfr archive +bdfr archive ``` ```bash -python3 -m bdfr clone +bdfr clone ``` However, these commands are not enough. You should chain parameters in [Options](#options) according to your use case. Don't forget that some parameters can be provided multiple times. Some quick reference commands are: ```bash -python3 -m bdfr download ./path/to/output --subreddit Python -L 10 +bdfr download ./path/to/output --subreddit Python -L 10 ``` + ```bash -python3 -m bdfr download ./path/to/output --user reddituser --submitted -L 100 +bdfr download ./path/to/output --user reddituser --submitted -L 100 ``` + ```bash -python3 -m bdfr download ./path/to/output --user reddituser --submitted --all-comments --comment-context +bdfr download ./path/to/output --user me --saved --authenticate -L 25 --file-scheme '{POSTID}' ``` + ```bash -python3 -m bdfr download ./path/to/output --user me --saved --authenticate -L 25 --file-scheme '{POSTID}' +bdfr download ./path/to/output --subreddit 'Python, all, mindustry' -L 10 --make-hard-links ``` + ```bash -python3 -m bdfr download ./path/to/output --subreddit 'Python, all, mindustry' -L 10 --make-hard-links +bdfr archive ./path/to/output --user reddituser --submitted --all-comments --comment-context ``` + ```bash -python3 -m bdfr archive ./path/to/output --subreddit all --format yaml -L 500 --folder-scheme '' +bdfr archive ./path/to/output --subreddit all --format yaml -L 500 --folder-scheme '' ``` Alternatively, you can pass options through a YAML file. ```bash -python3 -m bdfr download ./path/to/output --opts my_opts.yaml +bdfr download ./path/to/output --opts my_opts.yaml ``` For example, running it with the following file @@ -87,8 +104,9 @@ subreddit: ``` would be equilavent to (take note that in YAML there is `file_scheme` instead of `file-scheme`): + ```bash -python3 -m bdfr download ./path/to/output --skip mp4 --skip avi --file-scheme "{UPVOTES}_{REDDITOR}_{POSTID}_{DATE}" -L 10 -S top --subreddit EarthPorn --subreddit CityPorn +bdfr download ./path/to/output --skip mp4 --skip avi --file-scheme "{UPVOTES}_{REDDITOR}_{POSTID}_{DATE}" -L 10 -S top --subreddit EarthPorn --subreddit CityPorn ``` In case when the same option is specified both in the YAML file and in as a command line argument, the command line argument takes prs @@ -156,8 +174,8 @@ The following options are common between both the `archive` and `download` comma - `-m, --multireddit` - This is the name of a multireddit to add as a source - Can be specified multiple times - - This can be done by using `-m` multiple times - - Multireddits can also be used to provide CSV multireddits e.g. `-m 'chess, favourites'` + - This can be done by using `-m` multiple times + - Multireddits can also be used to provide CSV multireddits e.g. `-m 'chess, favourites'` - The specified multireddits must all belong to the user specified with the `--user` option - `-s, --subreddit` - This adds a subreddit as a source @@ -237,7 +255,6 @@ The following options apply only to the `download` command. This command downloa - `--max-score-ratio` - This skips all submissions which have higher than specified upvote ratio - ### Archiver Options The following options are for the `archive` command specifically. @@ -266,7 +283,7 @@ Alternatively, you can use the command-line [xargs](https://en.wikipedia.org/wik For a list of users `users.txt` (one user per line), type: ```bash -cat users.txt | xargs -L 1 echo --user | xargs -L 50 python3 -m bdfr download +cat users.txt | xargs -L 1 echo --user | xargs -L 50 bdfr download ``` The part `-L 50` is to make sure that the character limit for a single line isn't exceeded, but may not be necessary. This can also be used to load subreddits from a file, simply exchange `--user` with `--subreddit` and so on. @@ -291,18 +308,18 @@ For more details on the configuration file and the values therein, see [Configur The naming and folder schemes for the BDFR are both completely customisable. A number of different fields can be given which will be replaced with properties from a submission when downloading it. The scheme format takes the form of `{KEY}`, where `KEY` is a string from the below list. - - `DATE` - - `FLAIR` - - `POSTID` - - `REDDITOR` - - `SUBREDDIT` - - `TITLE` - - `UPVOTES` +- `DATE` +- `FLAIR` +- `POSTID` +- `REDDITOR` +- `SUBREDDIT` +- `TITLE` +- `UPVOTES` Each of these can be enclosed in curly bracket, `{}`, and included in the name. For example, to just title every downloaded post with the unique submission ID, you can use `{POSTID}`. Static strings can also be included, such as `download_{POSTID}` which will not change from submission to submission. For example, the previous string will result in the following submission file names: - - `download_aaaaaa.png` - - `download_bbbbbb.png` +- `download_aaaaaa.png` +- `download_bbbbbb.png` At least one key *must* be included in the file scheme, otherwise an error will be thrown. The folder scheme however, can be null or a simple static string. In the former case, all files will be placed in the folder specified with the `directory` argument. If the folder scheme is a static string, then all submissions will be placed in a folder of that name. In both cases, there will be no separation between all submissions. @@ -312,19 +329,19 @@ It is highly recommended that the file name scheme contain the parameter `{POSTI The configuration files are, by default, stored in the configuration directory for the user. This differs depending on the OS that the BDFR is being run on. For Windows, this will be: - - `C:\Users\\AppData\Local\BDFR\bdfr` +- `C:\Users\\AppData\Local\BDFR\bdfr` If Python has been installed through the Windows Store, the folder will appear in a different place. Note that the hash included in the file path may change from installation to installation. - - `C:\Users\\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\Local\BDFR\bdfr` +- `C:\Users\\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\Local\BDFR\bdfr` On Mac OSX, this will be: - - `~/Library/Application Support/bdfr`. - +- `~/Library/Application Support/bdfr`. + Lastly, on a Linux system, this will be: - - `~/.config/bdfr/` +- `~/.config/bdfr/` The logging output for each run of the BDFR will be saved to this directory in the file `log_output.txt`. If you need to submit a bug, it is this file that you will need to submit with the report. @@ -332,16 +349,16 @@ The logging output for each run of the BDFR will be saved to this directory in t The `config.cfg` is the file that supplies the BDFR with the configuration to use. At the moment, the following keys **must** be included in the configuration file supplied. - - `client_id` - - `client_secret` - - `scopes` +- `client_id` +- `client_secret` +- `scopes` The following keys are optional, and defaults will be used if they cannot be found. - - `backup_log_count` - - `max_wait_time` - - `time_format` - - `disabled_modules` +- `backup_log_count` +- `max_wait_time` +- `time_format` +- `disabled_modules` All of these should not be modified unless you know what you're doing, as the default values will enable the BDFR to function just fine. A configuration is included in the BDFR when it is installed, and this will be placed in the configuration directory as the default. @@ -360,12 +377,16 @@ The individual modules of the BDFR, used to download submissions from websites, Modules can be disabled through the command line interface for the BDFR or more permanently in the configuration file via the `disabled_modules` option. The list of downloaders that can be disabled are the following. Note that they are case-insensitive. - `Direct` +- `DelayForReddit` - `Erome` - `Gallery` (Reddit Image Galleries) - `Gfycat` - `Imgur` +- `PornHub` - `Redgifs` - `SelfPost` (Reddit Text Post) +- `Vidble` +- `VReddit` (Reddit Video Post) - `Youtube` - `YoutubeDlFallback` @@ -393,17 +414,19 @@ The logfiles that the BDFR outputs are consistent and quite detailed and in a fo ## List of currently supported sources - - Direct links (links leading to a file) - - Erome - - Gfycat - - Gif Delivery Network - - Imgur - - Reddit Galleries - - Reddit Text Posts - - Reddit Videos - - Redgifs - - YouTube - - Streamable +- Direct links (links leading to a file) +- Delay for Reddit +- Erome +- Gfycat +- Gif Delivery Network +- Imgur +- Reddit Galleries +- Reddit Text Posts +- Reddit Videos +- Redgifs +- Vidble +- YouTube + - Any source supported by [YT-DLP](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md) should be compatable ## Contributing diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index e08f1c2..89aa003 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -51,7 +51,7 @@ $ python3 -m pip install -e . Then, you can run the program from anywhere in your disk as such: ```bash -$ python3 -m bdfr +bdfr ``` ## Style Guide diff --git a/setup.cfg b/setup.cfg index 67a1deb..953588a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,7 +4,7 @@ description_file = README.md description_content_type = text/markdown home_page = https://github.com/aliparlakci/bulk-downloader-for-reddit keywords = reddit, download, archive -version = 2.6.0 +version = 2.6.2 author = Ali Parlakci author_email = parlakciali@gmail.com maintainer = Serene Arc @@ -20,3 +20,7 @@ platforms = any [files] packages = bdfr + +[entry_points] +console_scripts = + bdfr = bdfr.__main__:cli From 7b7167643f74ab17744cae31c015a5f870ecec60 Mon Sep 17 00:00:00 2001 From: OMEGA_RAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Wed, 30 Nov 2022 14:20:51 -0500 Subject: [PATCH 17/76] Update README.md Get rid of the double with. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5aae6f6..35159c2 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Included in this README are a few example Bash tricks to get certain behaviour. python3 -m pip install bdfr --upgrade ``` -or with [pipx](https://pypa.github.io/pipx) with: +or via [pipx](https://pypa.github.io/pipx) with: ```bash python3 -m pipx install bdfr --upgrade From 831f49daa65afa5a0485426d2a090b8354275ac6 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Wed, 30 Nov 2022 17:19:02 -0500 Subject: [PATCH 18/76] Refurb linting Lint with [refurb](https://github.com/dosisod/refurb) using `--disable 126 --python-version 3.9` --- bdfr/configuration.py | 2 +- bdfr/connector.py | 8 ++++---- bdfr/download_filter.py | 4 ++-- bdfr/downloader.py | 4 ++-- bdfr/file_name_formatter.py | 2 +- bdfr/site_downloaders/youtube.py | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bdfr/configuration.py b/bdfr/configuration.py index 46c4cf0..c15e429 100644 --- a/bdfr/configuration.py +++ b/bdfr/configuration.py @@ -75,7 +75,7 @@ class Configuration(Namespace): if not yaml_file_loc.exists(): logger.error(f'No YAML file found at {yaml_file_loc}') return - with open(yaml_file_loc) as file: + with yaml_file_loc.open() as file: try: opts = yaml.load(file, Loader=yaml.FullLoader) except yaml.YAMLError as e: diff --git a/bdfr/connector.py b/bdfr/connector.py index 9aa2a6e..3b359e8 100644 --- a/bdfr/connector.py +++ b/bdfr/connector.py @@ -91,7 +91,7 @@ class RedditConnector(metaclass=ABCMeta): logger.log(9, 'Created site authenticator') self.args.skip_subreddit = self.split_args_input(self.args.skip_subreddit) - self.args.skip_subreddit = set([sub.lower() for sub in self.args.skip_subreddit]) + self.args.skip_subreddit = {sub.lower() for sub in self.args.skip_subreddit} def read_config(self): """Read any cfg values that need to be processed""" @@ -113,7 +113,7 @@ class RedditConnector(metaclass=ABCMeta): def parse_disabled_modules(self): disabled_modules = self.args.disable_module disabled_modules = self.split_args_input(disabled_modules) - disabled_modules = set([name.strip().lower() for name in disabled_modules]) + disabled_modules = {name.strip().lower() for name in disabled_modules} self.args.disable_module = disabled_modules logger.debug(f'Disabling the following modules: {", ".join(self.args.disable_module)}') @@ -249,7 +249,7 @@ class RedditConnector(metaclass=ABCMeta): if self.args.authenticate: try: subscribed_subreddits = list(self.reddit_instance.user.subreddits(limit=None)) - subscribed_subreddits = set([s.display_name for s in subscribed_subreddits]) + subscribed_subreddits = {s.display_name for s in subscribed_subreddits} except prawcore.InsufficientScope: logger.error('BDFR has insufficient scope to access subreddit lists') else: @@ -428,7 +428,7 @@ class RedditConnector(metaclass=ABCMeta): if not id_file.exists(): logger.warning(f'ID file at {id_file} does not exist') continue - with open(id_file, 'r') as file: + with id_file.open('r') as file: for line in file: out.append(line.strip()) return set(out) diff --git a/bdfr/download_filter.py b/bdfr/download_filter.py index 3bbbdec..28053be 100644 --- a/bdfr/download_filter.py +++ b/bdfr/download_filter.py @@ -36,7 +36,7 @@ class DownloadFilter: combined_extensions = '|'.join(self.excluded_extensions) pattern = re.compile(r'.*({})$'.format(combined_extensions)) if re.match(pattern, resource_extension): - logger.log(9, f'Url "{resource_extension}" matched with "{str(pattern)}"') + logger.log(9, f'Url "{resource_extension}" matched with "{pattern}"') return False else: return True @@ -47,7 +47,7 @@ class DownloadFilter: combined_domains = '|'.join(self.excluded_domains) pattern = re.compile(r'https?://.*({}).*'.format(combined_domains)) if re.match(pattern, url): - logger.log(9, f'Url "{url}" matched with "{str(pattern)}"') + logger.log(9, f'Url "{url}" matched with "{pattern}"') return False else: return True diff --git a/bdfr/downloader.py b/bdfr/downloader.py index 057f6af..6f26937 100644 --- a/bdfr/downloader.py +++ b/bdfr/downloader.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) def _calc_hash(existing_file: Path): chunk_size = 1024 * 1024 md5_hash = hashlib.md5() - with open(existing_file, 'rb') as file: + with existing_file.open('rb') as file: chunk = file.read(chunk_size) while chunk: md5_hash.update(chunk) @@ -127,7 +127,7 @@ class RedditDownloader(RedditConnector): f' in submission {submission.id}') return try: - with open(destination, 'wb') as file: + with destination.open('wb') as file: file.write(res.content) logger.debug(f'Written file to {destination}') except OSError as e: diff --git a/bdfr/file_name_formatter.py b/bdfr/file_name_formatter.py index 1dabd34..70bd527 100644 --- a/bdfr/file_name_formatter.py +++ b/bdfr/file_name_formatter.py @@ -107,7 +107,7 @@ class FileNameFormatter: destination_directory, *[self._format_name(resource.source_submission, part) for part in self.directory_format_string], ) - index = f'_{str(index)}' if index else '' + index = f'_{index}' if index else '' if not resource.extension: raise BulkDownloaderException(f'Resource from {resource.url} has no extension') file_name = str(self._format_name(resource.source_submission, self.file_format_string)) diff --git a/bdfr/site_downloaders/youtube.py b/bdfr/site_downloaders/youtube.py index 70c35ae..315fd0a 100644 --- a/bdfr/site_downloaders/youtube.py +++ b/bdfr/site_downloaders/youtube.py @@ -48,11 +48,11 @@ class Youtube(BaseDownloader): raise SiteDownloaderError(f'Youtube download failed: {e}') downloaded_files = list(download_path.iterdir()) - if len(downloaded_files) > 0: + if downloaded_files: downloaded_file = downloaded_files[0] else: raise NotADownloadableLinkError(f"No media exists in the URL {self.post.url}") - with open(downloaded_file, 'rb') as file: + with downloaded_file.open('rb') as file: content = file.read() return content return download From ef7fcce1cc27fc6a85279e576d7f4d32aa66b1c7 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Wed, 30 Nov 2022 18:05:10 -0500 Subject: [PATCH 19/76] lint tests Lint with [refurb](https://github.com/dosisod/refurb) using `--disable 126 --python-version 3.9` Also update bats to 1.8.2 and bats-assets to 2.1.0. No changes to the tests, all still passing. --- pytest.ini | 2 +- scripts/tests/bats | 2 +- scripts/tests/test_helper/bats-assert | 2 +- tests/test_connector.py | 6 +++--- tests/test_downloader.py | 2 +- tests/test_oauth2.py | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pytest.ini b/pytest.ini index 5123ee6..09df53c 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,7 +1,7 @@ [pytest] +addopts = --strict-markers markers = online: tests require a connection to the internet reddit: tests require a connection to Reddit slow: test is slow to run authenticated: test requires an authenticated Reddit instance - diff --git a/scripts/tests/bats b/scripts/tests/bats index ce5ca28..e8c840b 160000 --- a/scripts/tests/bats +++ b/scripts/tests/bats @@ -1 +1 @@ -Subproject commit ce5ca2802fabe5dc38393240cd40e20f8928d3b0 +Subproject commit e8c840b58f0833e23461c682655fe540aa923f85 diff --git a/scripts/tests/test_helper/bats-assert b/scripts/tests/test_helper/bats-assert index e0de84e..78fa631 160000 --- a/scripts/tests/test_helper/bats-assert +++ b/scripts/tests/test_helper/bats-assert @@ -1 +1 @@ -Subproject commit e0de84e9c011223e7f88b7ccf1c929f4327097ba +Subproject commit 78fa631d1370562d2cd4a1390989e706158e7bf0 diff --git a/tests/test_connector.py b/tests/test_connector.py index 2cddcdf..4c9e52d 100644 --- a/tests/test_connector.py +++ b/tests/test_connector.py @@ -47,7 +47,7 @@ def assert_all_results_are_submissions(result_limit: int, results: list[Iterator def assert_all_results_are_submissions_or_comments(result_limit: int, results: list[Iterator]) -> list: results = [sub for res in results for sub in res] - assert all([isinstance(res, praw.models.Submission) or isinstance(res, praw.models.Comment) for res in results]) + assert all([isinstance(res, (praw.models.Submission, praw.models.Comment)) for res in results]) assert not any([isinstance(m, MagicMock) for m in results]) if result_limit is not None: assert len(results) == result_limit @@ -259,7 +259,7 @@ def test_get_subreddit_search( assert all([res.subreddit.display_name in test_subreddits for res in results]) assert len(results) <= max_expected_len if max_expected_len != 0: - assert len(results) > 0 + assert results assert not any([isinstance(m, MagicMock) for m in results]) @@ -356,7 +356,7 @@ def test_get_subscribed_subreddits(downloader_mock: MagicMock, authenticated_red downloader_mock.sort_filter = RedditTypes.SortType.HOT results = RedditConnector.get_subreddits(downloader_mock) assert all([isinstance(s, praw.models.ListingGenerator) for s in results]) - assert len(results) > 0 + assert results @pytest.mark.parametrize(('test_name', 'expected'), ( diff --git a/tests/test_downloader.py b/tests/test_downloader.py index e2e9e82..e92d870 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -152,7 +152,7 @@ def test_download_submission_hash_exists( RedditDownloader._download_submission(downloader_mock, submission) folder_contents = list(tmp_path.iterdir()) output = capsys.readouterr() - assert len(folder_contents) == 0 + assert not folder_contents assert re.search(r'Resource hash .*? downloaded elsewhere', output.out) diff --git a/tests/test_oauth2.py b/tests/test_oauth2.py index 6c25d35..71bdca1 100644 --- a/tests/test_oauth2.py +++ b/tests/test_oauth2.py @@ -66,6 +66,6 @@ def test_token_manager_write(example_config: configparser.ConfigParser, tmp_path test_manager = OAuth2TokenManager(example_config, test_path) test_manager.post_refresh_callback(mock_authoriser) assert example_config.get('DEFAULT', 'user_token') == 'changed_token' - with open(test_path, 'r') as file: + with test_path.open('r') as file: file_contents = file.read() assert 'user_token = changed_token' in file_contents From 69fa1f3f090535f14d143be3d845da4c0e62bc00 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Wed, 30 Nov 2022 21:12:03 -0500 Subject: [PATCH 20/76] Pylance typing Fix Pylance warnings for typing --- bdfr/archive_entry/base_archive_entry.py | 2 +- bdfr/archiver.py | 4 ++-- bdfr/file_name_formatter.py | 2 +- tests/test_file_name_formatter.py | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/bdfr/archive_entry/base_archive_entry.py b/bdfr/archive_entry/base_archive_entry.py index a33381e..fceee33 100644 --- a/bdfr/archive_entry/base_archive_entry.py +++ b/bdfr/archive_entry/base_archive_entry.py @@ -7,7 +7,7 @@ from praw.models import Comment, Submission class BaseArchiveEntry(ABC): - def __init__(self, source: (Comment, Submission)): + def __init__(self, source: Comment | Submission): self.source = source self.post_details: dict = {} diff --git a/bdfr/archiver.py b/bdfr/archiver.py index 4bd24f5..1fb3ee2 100644 --- a/bdfr/archiver.py +++ b/bdfr/archiver.py @@ -65,7 +65,7 @@ class Archiver(RedditConnector): return results @staticmethod - def _pull_lever_entry_factory(praw_item: (praw.models.Submission, praw.models.Comment)) -> BaseArchiveEntry: + def _pull_lever_entry_factory(praw_item: praw.models.Submission | praw.models.Comment) -> BaseArchiveEntry: if isinstance(praw_item, praw.models.Submission): return SubmissionArchiveEntry(praw_item) elif isinstance(praw_item, praw.models.Comment): @@ -73,7 +73,7 @@ class Archiver(RedditConnector): else: raise ArchiverError(f'Factory failed to classify item of type {type(praw_item).__name__}') - def write_entry(self, praw_item: (praw.models.Submission, praw.models.Comment)): + def write_entry(self, praw_item: praw.models.Submission | praw.models.Comment): if self.args.comment_context and isinstance(praw_item, praw.models.Comment): logger.debug(f'Converting comment {praw_item.id} to submission {praw_item.submission.id}') praw_item = praw_item.submission diff --git a/bdfr/file_name_formatter.py b/bdfr/file_name_formatter.py index 70bd527..b20ea6f 100644 --- a/bdfr/file_name_formatter.py +++ b/bdfr/file_name_formatter.py @@ -34,7 +34,7 @@ class FileNameFormatter: self.directory_format_string: list[str] = directory_format_string.split('/') self.time_format_string = time_format_string - def _format_name(self, submission: (Comment, Submission), format_string: str) -> str: + def _format_name(self, submission: Comment | Submission, format_string: str) -> str: if isinstance(submission, Submission): attributes = self._generate_name_dict_from_submission(submission) elif isinstance(submission, Comment): diff --git a/tests/test_file_name_formatter.py b/tests/test_file_name_formatter.py index e7f1ebe..993718c 100644 --- a/tests/test_file_name_formatter.py +++ b/tests/test_file_name_formatter.py @@ -6,7 +6,7 @@ import sys import unittest.mock from datetime import datetime from pathlib import Path -from typing import Optional +from typing import Optional, Type from unittest.mock import MagicMock import praw.models @@ -33,7 +33,7 @@ def submission() -> MagicMock: return test -def do_test_string_equality(result: [Path, str], expected: str) -> bool: +def do_test_string_equality(result: Path | str, expected: str) -> bool: if platform.system() == 'Windows': expected = FileNameFormatter._format_for_windows(expected) return str(result).endswith(expected) @@ -411,7 +411,7 @@ def test_windows_max_path(tmp_path: Path): )) def test_name_submission( test_reddit_id: str, - test_downloader: type(BaseDownloader), + test_downloader: Type[BaseDownloader], expected_names: set[str], reddit_instance: praw.reddit.Reddit, ): From 45429be27c10dba097d4e202722c14c39f900a5a Mon Sep 17 00:00:00 2001 From: Serene <33189705+Serene-Arc@users.noreply.github.com> Date: Thu, 1 Dec 2022 12:37:03 +1000 Subject: [PATCH 21/76] Revert "Pylance typing" --- bdfr/archive_entry/base_archive_entry.py | 2 +- bdfr/archiver.py | 4 ++-- bdfr/file_name_formatter.py | 2 +- tests/test_file_name_formatter.py | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/bdfr/archive_entry/base_archive_entry.py b/bdfr/archive_entry/base_archive_entry.py index fceee33..a33381e 100644 --- a/bdfr/archive_entry/base_archive_entry.py +++ b/bdfr/archive_entry/base_archive_entry.py @@ -7,7 +7,7 @@ from praw.models import Comment, Submission class BaseArchiveEntry(ABC): - def __init__(self, source: Comment | Submission): + def __init__(self, source: (Comment, Submission)): self.source = source self.post_details: dict = {} diff --git a/bdfr/archiver.py b/bdfr/archiver.py index 1fb3ee2..4bd24f5 100644 --- a/bdfr/archiver.py +++ b/bdfr/archiver.py @@ -65,7 +65,7 @@ class Archiver(RedditConnector): return results @staticmethod - def _pull_lever_entry_factory(praw_item: praw.models.Submission | praw.models.Comment) -> BaseArchiveEntry: + def _pull_lever_entry_factory(praw_item: (praw.models.Submission, praw.models.Comment)) -> BaseArchiveEntry: if isinstance(praw_item, praw.models.Submission): return SubmissionArchiveEntry(praw_item) elif isinstance(praw_item, praw.models.Comment): @@ -73,7 +73,7 @@ class Archiver(RedditConnector): else: raise ArchiverError(f'Factory failed to classify item of type {type(praw_item).__name__}') - def write_entry(self, praw_item: praw.models.Submission | praw.models.Comment): + def write_entry(self, praw_item: (praw.models.Submission, praw.models.Comment)): if self.args.comment_context and isinstance(praw_item, praw.models.Comment): logger.debug(f'Converting comment {praw_item.id} to submission {praw_item.submission.id}') praw_item = praw_item.submission diff --git a/bdfr/file_name_formatter.py b/bdfr/file_name_formatter.py index b20ea6f..70bd527 100644 --- a/bdfr/file_name_formatter.py +++ b/bdfr/file_name_formatter.py @@ -34,7 +34,7 @@ class FileNameFormatter: self.directory_format_string: list[str] = directory_format_string.split('/') self.time_format_string = time_format_string - def _format_name(self, submission: Comment | Submission, format_string: str) -> str: + def _format_name(self, submission: (Comment, Submission), format_string: str) -> str: if isinstance(submission, Submission): attributes = self._generate_name_dict_from_submission(submission) elif isinstance(submission, Comment): diff --git a/tests/test_file_name_formatter.py b/tests/test_file_name_formatter.py index 993718c..e7f1ebe 100644 --- a/tests/test_file_name_formatter.py +++ b/tests/test_file_name_formatter.py @@ -6,7 +6,7 @@ import sys import unittest.mock from datetime import datetime from pathlib import Path -from typing import Optional, Type +from typing import Optional from unittest.mock import MagicMock import praw.models @@ -33,7 +33,7 @@ def submission() -> MagicMock: return test -def do_test_string_equality(result: Path | str, expected: str) -> bool: +def do_test_string_equality(result: [Path, str], expected: str) -> bool: if platform.system() == 'Windows': expected = FileNameFormatter._format_for_windows(expected) return str(result).endswith(expected) @@ -411,7 +411,7 @@ def test_windows_max_path(tmp_path: Path): )) def test_name_submission( test_reddit_id: str, - test_downloader: Type[BaseDownloader], + test_downloader: type(BaseDownloader), expected_names: set[str], reddit_instance: praw.reddit.Reddit, ): From b30ced9be958da5483427b0f40f8473cdcdc91ff Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Wed, 30 Nov 2022 21:48:10 -0500 Subject: [PATCH 22/76] Redo Pylance typing changes --- bdfr/archive_entry/base_archive_entry.py | 3 ++- bdfr/archiver.py | 6 +++--- bdfr/file_name_formatter.py | 4 ++-- tests/test_file_name_formatter.py | 6 +++--- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/bdfr/archive_entry/base_archive_entry.py b/bdfr/archive_entry/base_archive_entry.py index a33381e..57e36f8 100644 --- a/bdfr/archive_entry/base_archive_entry.py +++ b/bdfr/archive_entry/base_archive_entry.py @@ -2,12 +2,13 @@ # coding=utf-8 from abc import ABC, abstractmethod +from typing import Union from praw.models import Comment, Submission class BaseArchiveEntry(ABC): - def __init__(self, source: (Comment, Submission)): + def __init__(self, source: Union[Comment, Submission]): self.source = source self.post_details: dict = {} diff --git a/bdfr/archiver.py b/bdfr/archiver.py index 4bd24f5..809af96 100644 --- a/bdfr/archiver.py +++ b/bdfr/archiver.py @@ -4,7 +4,7 @@ import json import logging import re -from typing import Iterator +from typing import Iterator, Union import dict2xml import praw.models @@ -65,7 +65,7 @@ class Archiver(RedditConnector): return results @staticmethod - def _pull_lever_entry_factory(praw_item: (praw.models.Submission, praw.models.Comment)) -> BaseArchiveEntry: + def _pull_lever_entry_factory(praw_item: Union[praw.models.Submission, praw.models.Comment]) -> BaseArchiveEntry: if isinstance(praw_item, praw.models.Submission): return SubmissionArchiveEntry(praw_item) elif isinstance(praw_item, praw.models.Comment): @@ -73,7 +73,7 @@ class Archiver(RedditConnector): else: raise ArchiverError(f'Factory failed to classify item of type {type(praw_item).__name__}') - def write_entry(self, praw_item: (praw.models.Submission, praw.models.Comment)): + def write_entry(self, praw_item: Union[praw.models.Submission, praw.models.Comment]): if self.args.comment_context and isinstance(praw_item, praw.models.Comment): logger.debug(f'Converting comment {praw_item.id} to submission {praw_item.submission.id}') praw_item = praw_item.submission diff --git a/bdfr/file_name_formatter.py b/bdfr/file_name_formatter.py index 70bd527..4a039c9 100644 --- a/bdfr/file_name_formatter.py +++ b/bdfr/file_name_formatter.py @@ -6,7 +6,7 @@ import platform import re import subprocess from pathlib import Path -from typing import Optional +from typing import Optional, Union from praw.models import Comment, Submission @@ -34,7 +34,7 @@ class FileNameFormatter: self.directory_format_string: list[str] = directory_format_string.split('/') self.time_format_string = time_format_string - def _format_name(self, submission: (Comment, Submission), format_string: str) -> str: + def _format_name(self, submission: Union[Comment, Submission], format_string: str) -> str: if isinstance(submission, Submission): attributes = self._generate_name_dict_from_submission(submission) elif isinstance(submission, Comment): diff --git a/tests/test_file_name_formatter.py b/tests/test_file_name_formatter.py index e7f1ebe..0492536 100644 --- a/tests/test_file_name_formatter.py +++ b/tests/test_file_name_formatter.py @@ -6,7 +6,7 @@ import sys import unittest.mock from datetime import datetime from pathlib import Path -from typing import Optional +from typing import Optional, Type, Union from unittest.mock import MagicMock import praw.models @@ -33,7 +33,7 @@ def submission() -> MagicMock: return test -def do_test_string_equality(result: [Path, str], expected: str) -> bool: +def do_test_string_equality(result: Union[Path, str], expected: str) -> bool: if platform.system() == 'Windows': expected = FileNameFormatter._format_for_windows(expected) return str(result).endswith(expected) @@ -411,7 +411,7 @@ def test_windows_max_path(tmp_path: Path): )) def test_name_submission( test_reddit_id: str, - test_downloader: type(BaseDownloader), + test_downloader: Type[BaseDownloader], expected_names: set[str], reddit_instance: praw.reddit.Reddit, ): From 2524070bd02b1111da4225f5ad46ad5a0394c65a Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Dec 2022 15:04:02 +1000 Subject: [PATCH 23/76] Add tox configuration for formatting --- tox.ini | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 tox.ini diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..e5dce99 --- /dev/null +++ b/tox.ini @@ -0,0 +1,16 @@ +[tox] +envlist = + format + +[testenv:format] +deps = + isort + black +skip_install = True +commands = + isort bdfr tests + black bdfr tests --line-length 120 + +[isort] +profile = black +multi_line_output = 3 \ No newline at end of file From 3136a6488c5c26968999206f66d621543d97fc18 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Dec 2022 15:06:05 +1000 Subject: [PATCH 24/76] Add tox to dev requirements --- dev_requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/dev_requirements.txt b/dev_requirements.txt index e079f8a..5bbae8f 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1 +1,2 @@ pytest +tox From 51b09a77edb182828ce2116560b689ae7bfc26a7 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Dec 2022 15:06:52 +1000 Subject: [PATCH 25/76] Change filename to conform to standard --- dev_requirements.txt => dev-requirements.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dev_requirements.txt => dev-requirements.txt (100%) diff --git a/dev_requirements.txt b/dev-requirements.txt similarity index 100% rename from dev_requirements.txt rename to dev-requirements.txt From 96cd7d714735a42b0d14e9119489cdc5bf5bd541 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Dec 2022 15:07:28 +1000 Subject: [PATCH 26/76] Add formatting tools to dev requirements --- dev-requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dev-requirements.txt b/dev-requirements.txt index 5bbae8f..af48d1b 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,2 +1,4 @@ +black +isort pytest tox From 0873a4a2b2d307676d1d91f53fe0c68ce5fe41a1 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Dec 2022 15:11:17 +1000 Subject: [PATCH 27/76] Format according to the black standard --- bdfr/__main__.py | 112 ++-- bdfr/archive_entry/base_archive_entry.py | 28 +- bdfr/archive_entry/comment_archive_entry.py | 2 +- .../archive_entry/submission_archive_entry.py | 38 +- bdfr/archiver.py | 49 +- bdfr/cloner.py | 2 +- bdfr/configuration.py | 27 +- bdfr/connector.py | 182 ++++--- bdfr/download_filter.py | 8 +- bdfr/downloader.py | 70 +-- bdfr/exceptions.py | 1 + bdfr/file_name_formatter.py | 135 ++--- bdfr/oauth2.py | 59 +- bdfr/resource.py | 21 +- bdfr/site_downloaders/base_downloader.py | 4 +- bdfr/site_downloaders/delay_for_reddit.py | 2 +- bdfr/site_downloaders/direct.py | 2 +- bdfr/site_downloaders/download_factory.py | 59 +- bdfr/site_downloaders/erome.py | 24 +- .../fallback_downloader.py | 1 - .../fallback_downloaders/ytdlp_fallback.py | 6 +- bdfr/site_downloaders/gallery.py | 20 +- bdfr/site_downloaders/gfycat.py | 20 +- bdfr/site_downloaders/imgur.py | 47 +- bdfr/site_downloaders/pornhub.py | 6 +- bdfr/site_downloaders/redgifs.py | 49 +- bdfr/site_downloaders/self_post.py | 38 +- bdfr/site_downloaders/vidble.py | 24 +- bdfr/site_downloaders/vreddit.py | 10 +- bdfr/site_downloaders/youtube.py | 35 +- scripts/tests/bats | 2 +- scripts/tests/test_helper/bats-assert | 2 +- .../test_comment_archive_entry.py | 39 +- .../test_submission_archive_entry.py | 40 +- tests/conftest.py | 22 +- .../test_archive_integration.py | 138 ++--- .../test_clone_integration.py | 52 +- .../test_download_integration.py | 339 ++++++------ .../test_ytdlp_fallback.py | 39 +- .../site_downloaders/test_delay_for_reddit.py | 11 +- tests/site_downloaders/test_direct.py | 11 +- .../site_downloaders/test_download_factory.py | 107 ++-- tests/site_downloaders/test_erome.py | 45 +- tests/site_downloaders/test_gallery.py | 135 +++-- tests/site_downloaders/test_gfycat.py | 22 +- tests/site_downloaders/test_imgur.py | 257 ++++----- tests/site_downloaders/test_pornhub.py | 11 +- tests/site_downloaders/test_redgifs.py | 106 ++-- tests/site_downloaders/test_self_post.py | 13 +- tests/site_downloaders/test_vidble.py | 102 ++-- tests/site_downloaders/test_vreddit.py | 18 +- tests/site_downloaders/test_youtube.py | 22 +- tests/test_archiver.py | 15 +- tests/test_configuration.py | 23 +- tests/test_connector.py | 405 ++++++++------ tests/test_download_filter.py | 74 +-- tests/test_downloader.py | 207 ++++--- tests/test_file_name_formatter.py | 512 ++++++++++-------- tests/test_oauth2.py | 66 ++- tests/test_resource.py | 34 +- 60 files changed, 2160 insertions(+), 1790 deletions(-) diff --git a/bdfr/__main__.py b/bdfr/__main__.py index 1117a70..c26f577 100644 --- a/bdfr/__main__.py +++ b/bdfr/__main__.py @@ -13,53 +13,54 @@ from bdfr.downloader import RedditDownloader logger = logging.getLogger() _common_options = [ - click.argument('directory', type=str), - click.option('--authenticate', is_flag=True, default=None), - click.option('--config', type=str, default=None), - click.option('--opts', type=str, default=None), - click.option('--disable-module', multiple=True, default=None, type=str), - click.option('--exclude-id', default=None, multiple=True), - click.option('--exclude-id-file', default=None, multiple=True), - click.option('--file-scheme', default=None, type=str), - click.option('--folder-scheme', default=None, type=str), - click.option('--ignore-user', type=str, multiple=True, default=None), - click.option('--include-id-file', multiple=True, default=None), - click.option('--log', type=str, default=None), - click.option('--saved', is_flag=True, default=None), - click.option('--search', default=None, type=str), - click.option('--submitted', is_flag=True, default=None), - click.option('--subscribed', is_flag=True, default=None), - click.option('--time-format', type=str, default=None), - click.option('--upvoted', is_flag=True, default=None), - click.option('-L', '--limit', default=None, type=int), - click.option('-l', '--link', multiple=True, default=None, type=str), - click.option('-m', '--multireddit', multiple=True, default=None, type=str), - click.option('-S', '--sort', type=click.Choice(('hot', 'top', 'new', 'controversial', 'rising', 'relevance')), - default=None), - click.option('-s', '--subreddit', multiple=True, default=None, type=str), - click.option('-t', '--time', type=click.Choice(('all', 'hour', 'day', 'week', 'month', 'year')), default=None), - click.option('-u', '--user', type=str, multiple=True, default=None), - click.option('-v', '--verbose', default=None, count=True), + click.argument("directory", type=str), + click.option("--authenticate", is_flag=True, default=None), + click.option("--config", type=str, default=None), + click.option("--opts", type=str, default=None), + click.option("--disable-module", multiple=True, default=None, type=str), + click.option("--exclude-id", default=None, multiple=True), + click.option("--exclude-id-file", default=None, multiple=True), + click.option("--file-scheme", default=None, type=str), + click.option("--folder-scheme", default=None, type=str), + click.option("--ignore-user", type=str, multiple=True, default=None), + click.option("--include-id-file", multiple=True, default=None), + click.option("--log", type=str, default=None), + click.option("--saved", is_flag=True, default=None), + click.option("--search", default=None, type=str), + click.option("--submitted", is_flag=True, default=None), + click.option("--subscribed", is_flag=True, default=None), + click.option("--time-format", type=str, default=None), + click.option("--upvoted", is_flag=True, default=None), + click.option("-L", "--limit", default=None, type=int), + click.option("-l", "--link", multiple=True, default=None, type=str), + click.option("-m", "--multireddit", multiple=True, default=None, type=str), + click.option( + "-S", "--sort", type=click.Choice(("hot", "top", "new", "controversial", "rising", "relevance")), default=None + ), + click.option("-s", "--subreddit", multiple=True, default=None, type=str), + click.option("-t", "--time", type=click.Choice(("all", "hour", "day", "week", "month", "year")), default=None), + click.option("-u", "--user", type=str, multiple=True, default=None), + click.option("-v", "--verbose", default=None, count=True), ] _downloader_options = [ - click.option('--make-hard-links', is_flag=True, default=None), - click.option('--max-wait-time', type=int, default=None), - click.option('--no-dupes', is_flag=True, default=None), - click.option('--search-existing', is_flag=True, default=None), - click.option('--skip', default=None, multiple=True), - click.option('--skip-domain', default=None, multiple=True), - click.option('--skip-subreddit', default=None, multiple=True), - click.option('--min-score', type=int, default=None), - click.option('--max-score', type=int, default=None), - click.option('--min-score-ratio', type=float, default=None), - click.option('--max-score-ratio', type=float, default=None), + click.option("--make-hard-links", is_flag=True, default=None), + click.option("--max-wait-time", type=int, default=None), + click.option("--no-dupes", is_flag=True, default=None), + click.option("--search-existing", is_flag=True, default=None), + click.option("--skip", default=None, multiple=True), + click.option("--skip-domain", default=None, multiple=True), + click.option("--skip-subreddit", default=None, multiple=True), + click.option("--min-score", type=int, default=None), + click.option("--max-score", type=int, default=None), + click.option("--min-score-ratio", type=float, default=None), + click.option("--max-score-ratio", type=float, default=None), ] _archiver_options = [ - click.option('--all-comments', is_flag=True, default=None), - click.option('--comment-context', is_flag=True, default=None), - click.option('-f', '--format', type=click.Choice(('xml', 'json', 'yaml')), default=None), + click.option("--all-comments", is_flag=True, default=None), + click.option("--comment-context", is_flag=True, default=None), + click.option("-f", "--format", type=click.Choice(("xml", "json", "yaml")), default=None), ] @@ -68,6 +69,7 @@ def _add_options(opts: list): for opt in opts: func = opt(func) return func + return wrap @@ -76,7 +78,7 @@ def cli(): pass -@cli.command('download') +@cli.command("download") @_add_options(_common_options) @_add_options(_downloader_options) @click.pass_context @@ -88,13 +90,13 @@ def cli_download(context: click.Context, **_): reddit_downloader = RedditDownloader(config) reddit_downloader.download() except Exception: - logger.exception('Downloader exited unexpectedly') + logger.exception("Downloader exited unexpectedly") raise else: - logger.info('Program complete') + logger.info("Program complete") -@cli.command('archive') +@cli.command("archive") @_add_options(_common_options) @_add_options(_archiver_options) @click.pass_context @@ -106,13 +108,13 @@ def cli_archive(context: click.Context, **_): reddit_archiver = Archiver(config) reddit_archiver.download() except Exception: - logger.exception('Archiver exited unexpectedly') + logger.exception("Archiver exited unexpectedly") raise else: - logger.info('Program complete') + logger.info("Program complete") -@cli.command('clone') +@cli.command("clone") @_add_options(_common_options) @_add_options(_archiver_options) @_add_options(_downloader_options) @@ -125,10 +127,10 @@ def cli_clone(context: click.Context, **_): reddit_scraper = RedditCloner(config) reddit_scraper.download() except Exception: - logger.exception('Scraper exited unexpectedly') + logger.exception("Scraper exited unexpectedly") raise else: - logger.info('Program complete') + logger.info("Program complete") def setup_logging(verbosity: int): @@ -141,7 +143,7 @@ def setup_logging(verbosity: int): stream = logging.StreamHandler(sys.stdout) stream.addFilter(StreamExceptionFilter()) - formatter = logging.Formatter('[%(asctime)s - %(name)s - %(levelname)s] - %(message)s') + formatter = logging.Formatter("[%(asctime)s - %(name)s - %(levelname)s] - %(message)s") stream.setFormatter(formatter) logger.addHandler(stream) @@ -151,10 +153,10 @@ def setup_logging(verbosity: int): stream.setLevel(logging.DEBUG) else: stream.setLevel(9) - logging.getLogger('praw').setLevel(logging.CRITICAL) - logging.getLogger('prawcore').setLevel(logging.CRITICAL) - logging.getLogger('urllib3').setLevel(logging.CRITICAL) + logging.getLogger("praw").setLevel(logging.CRITICAL) + logging.getLogger("prawcore").setLevel(logging.CRITICAL) + logging.getLogger("urllib3").setLevel(logging.CRITICAL) -if __name__ == '__main__': +if __name__ == "__main__": cli() diff --git a/bdfr/archive_entry/base_archive_entry.py b/bdfr/archive_entry/base_archive_entry.py index 57e36f8..49ea58a 100644 --- a/bdfr/archive_entry/base_archive_entry.py +++ b/bdfr/archive_entry/base_archive_entry.py @@ -19,21 +19,21 @@ class BaseArchiveEntry(ABC): @staticmethod def _convert_comment_to_dict(in_comment: Comment) -> dict: out_dict = { - 'author': in_comment.author.name if in_comment.author else 'DELETED', - 'id': in_comment.id, - 'score': in_comment.score, - 'subreddit': in_comment.subreddit.display_name, - 'author_flair': in_comment.author_flair_text, - 'submission': in_comment.submission.id, - 'stickied': in_comment.stickied, - 'body': in_comment.body, - 'is_submitter': in_comment.is_submitter, - 'distinguished': in_comment.distinguished, - 'created_utc': in_comment.created_utc, - 'parent_id': in_comment.parent_id, - 'replies': [], + "author": in_comment.author.name if in_comment.author else "DELETED", + "id": in_comment.id, + "score": in_comment.score, + "subreddit": in_comment.subreddit.display_name, + "author_flair": in_comment.author_flair_text, + "submission": in_comment.submission.id, + "stickied": in_comment.stickied, + "body": in_comment.body, + "is_submitter": in_comment.is_submitter, + "distinguished": in_comment.distinguished, + "created_utc": in_comment.created_utc, + "parent_id": in_comment.parent_id, + "replies": [], } in_comment.replies.replace_more(limit=None) for reply in in_comment.replies: - out_dict['replies'].append(BaseArchiveEntry._convert_comment_to_dict(reply)) + out_dict["replies"].append(BaseArchiveEntry._convert_comment_to_dict(reply)) return out_dict diff --git a/bdfr/archive_entry/comment_archive_entry.py b/bdfr/archive_entry/comment_archive_entry.py index 1bb5c18..1c72811 100644 --- a/bdfr/archive_entry/comment_archive_entry.py +++ b/bdfr/archive_entry/comment_archive_entry.py @@ -17,5 +17,5 @@ class CommentArchiveEntry(BaseArchiveEntry): def compile(self) -> dict: self.source.refresh() self.post_details = self._convert_comment_to_dict(self.source) - self.post_details['submission_title'] = self.source.submission.title + self.post_details["submission_title"] = self.source.submission.title return self.post_details diff --git a/bdfr/archive_entry/submission_archive_entry.py b/bdfr/archive_entry/submission_archive_entry.py index c124e0f..92f326e 100644 --- a/bdfr/archive_entry/submission_archive_entry.py +++ b/bdfr/archive_entry/submission_archive_entry.py @@ -18,32 +18,32 @@ class SubmissionArchiveEntry(BaseArchiveEntry): comments = self._get_comments() self._get_post_details() out = self.post_details - out['comments'] = comments + out["comments"] = comments return out def _get_post_details(self): self.post_details = { - 'title': self.source.title, - 'name': self.source.name, - 'url': self.source.url, - 'selftext': self.source.selftext, - 'score': self.source.score, - 'upvote_ratio': self.source.upvote_ratio, - 'permalink': self.source.permalink, - 'id': self.source.id, - 'author': self.source.author.name if self.source.author else 'DELETED', - 'link_flair_text': self.source.link_flair_text, - 'num_comments': self.source.num_comments, - 'over_18': self.source.over_18, - 'spoiler': self.source.spoiler, - 'pinned': self.source.pinned, - 'locked': self.source.locked, - 'distinguished': self.source.distinguished, - 'created_utc': self.source.created_utc, + "title": self.source.title, + "name": self.source.name, + "url": self.source.url, + "selftext": self.source.selftext, + "score": self.source.score, + "upvote_ratio": self.source.upvote_ratio, + "permalink": self.source.permalink, + "id": self.source.id, + "author": self.source.author.name if self.source.author else "DELETED", + "link_flair_text": self.source.link_flair_text, + "num_comments": self.source.num_comments, + "over_18": self.source.over_18, + "spoiler": self.source.spoiler, + "pinned": self.source.pinned, + "locked": self.source.locked, + "distinguished": self.source.distinguished, + "created_utc": self.source.created_utc, } def _get_comments(self) -> list[dict]: - logger.debug(f'Retrieving full comment tree for submission {self.source.id}') + logger.debug(f"Retrieving full comment tree for submission {self.source.id}") comments = [] self.source.comments.replace_more(limit=None) for top_level_comment in self.source.comments: diff --git a/bdfr/archiver.py b/bdfr/archiver.py index 809af96..3d0d31b 100644 --- a/bdfr/archiver.py +++ b/bdfr/archiver.py @@ -30,26 +30,28 @@ class Archiver(RedditConnector): for generator in self.reddit_lists: for submission in generator: try: - if (submission.author and submission.author.name in self.args.ignore_user) or \ - (submission.author is None and 'DELETED' in self.args.ignore_user): + if (submission.author and submission.author.name in self.args.ignore_user) or ( + submission.author is None and "DELETED" in self.args.ignore_user + ): logger.debug( - f'Submission {submission.id} in {submission.subreddit.display_name} skipped' - f' due to {submission.author.name if submission.author else "DELETED"} being an ignored user') + f"Submission {submission.id} in {submission.subreddit.display_name} skipped" + f' due to {submission.author.name if submission.author else "DELETED"} being an ignored user' + ) continue if submission.id in self.excluded_submission_ids: - logger.debug(f'Object {submission.id} in exclusion list, skipping') + logger.debug(f"Object {submission.id} in exclusion list, skipping") continue - logger.debug(f'Attempting to archive submission {submission.id}') + logger.debug(f"Attempting to archive submission {submission.id}") self.write_entry(submission) except prawcore.PrawcoreException as e: - logger.error(f'Submission {submission.id} failed to be archived due to a PRAW exception: {e}') + logger.error(f"Submission {submission.id} failed to be archived due to a PRAW exception: {e}") def get_submissions_from_link(self) -> list[list[praw.models.Submission]]: supplied_submissions = [] for sub_id in self.args.link: if len(sub_id) == 6: supplied_submissions.append(self.reddit_instance.submission(id=sub_id)) - elif re.match(r'^\w{7}$', sub_id): + elif re.match(r"^\w{7}$", sub_id): supplied_submissions.append(self.reddit_instance.comment(id=sub_id)) else: supplied_submissions.append(self.reddit_instance.submission(url=sub_id)) @@ -60,7 +62,7 @@ class Archiver(RedditConnector): if self.args.user and self.args.all_comments: sort = self.determine_sort_function() for user in self.args.user: - logger.debug(f'Retrieving comments of user {user}') + logger.debug(f"Retrieving comments of user {user}") results.append(sort(self.reddit_instance.redditor(user).comments, limit=self.args.limit)) return results @@ -71,43 +73,44 @@ class Archiver(RedditConnector): elif isinstance(praw_item, praw.models.Comment): return CommentArchiveEntry(praw_item) else: - raise ArchiverError(f'Factory failed to classify item of type {type(praw_item).__name__}') + raise ArchiverError(f"Factory failed to classify item of type {type(praw_item).__name__}") def write_entry(self, praw_item: Union[praw.models.Submission, praw.models.Comment]): if self.args.comment_context and isinstance(praw_item, praw.models.Comment): - logger.debug(f'Converting comment {praw_item.id} to submission {praw_item.submission.id}') + logger.debug(f"Converting comment {praw_item.id} to submission {praw_item.submission.id}") praw_item = praw_item.submission archive_entry = self._pull_lever_entry_factory(praw_item) - if self.args.format == 'json': + if self.args.format == "json": self._write_entry_json(archive_entry) - elif self.args.format == 'xml': + elif self.args.format == "xml": self._write_entry_xml(archive_entry) - elif self.args.format == 'yaml': + elif self.args.format == "yaml": self._write_entry_yaml(archive_entry) else: - raise ArchiverError(f'Unknown format {self.args.format} given') - logger.info(f'Record for entry item {praw_item.id} written to disk') + raise ArchiverError(f"Unknown format {self.args.format} given") + logger.info(f"Record for entry item {praw_item.id} written to disk") def _write_entry_json(self, entry: BaseArchiveEntry): - resource = Resource(entry.source, '', lambda: None, '.json') + resource = Resource(entry.source, "", lambda: None, ".json") content = json.dumps(entry.compile()) self._write_content_to_disk(resource, content) def _write_entry_xml(self, entry: BaseArchiveEntry): - resource = Resource(entry.source, '', lambda: None, '.xml') - content = dict2xml.dict2xml(entry.compile(), wrap='root') + resource = Resource(entry.source, "", lambda: None, ".xml") + content = dict2xml.dict2xml(entry.compile(), wrap="root") self._write_content_to_disk(resource, content) def _write_entry_yaml(self, entry: BaseArchiveEntry): - resource = Resource(entry.source, '', lambda: None, '.yaml') + resource = Resource(entry.source, "", lambda: None, ".yaml") content = yaml.dump(entry.compile()) self._write_content_to_disk(resource, content) def _write_content_to_disk(self, resource: Resource, content: str): file_path = self.file_name_formatter.format_path(resource, self.download_directory) file_path.parent.mkdir(exist_ok=True, parents=True) - with open(file_path, 'w', encoding="utf-8") as file: + with open(file_path, "w", encoding="utf-8") as file: logger.debug( - f'Writing entry {resource.source_submission.id} to file in {resource.extension[1:].upper()}' - f' format at {file_path}') + f"Writing entry {resource.source_submission.id} to file in {resource.extension[1:].upper()}" + f" format at {file_path}" + ) file.write(content) diff --git a/bdfr/cloner.py b/bdfr/cloner.py index 47e03f8..c26d17b 100644 --- a/bdfr/cloner.py +++ b/bdfr/cloner.py @@ -23,4 +23,4 @@ class RedditCloner(RedditDownloader, Archiver): self._download_submission(submission) self.write_entry(submission) except prawcore.PrawcoreException as e: - logger.error(f'Submission {submission.id} failed to be cloned due to a PRAW exception: {e}') + logger.error(f"Submission {submission.id} failed to be cloned due to a PRAW exception: {e}") diff --git a/bdfr/configuration.py b/bdfr/configuration.py index c15e429..a2a5310 100644 --- a/bdfr/configuration.py +++ b/bdfr/configuration.py @@ -1,28 +1,29 @@ #!/usr/bin/env python3 # coding=utf-8 +import logging from argparse import Namespace from pathlib import Path from typing import Optional -import logging import click import yaml logger = logging.getLogger(__name__) + class Configuration(Namespace): def __init__(self): super(Configuration, self).__init__() self.authenticate = False self.config = None self.opts: Optional[str] = None - self.directory: str = '.' + self.directory: str = "." self.disable_module: list[str] = [] self.exclude_id = [] self.exclude_id_file = [] - self.file_scheme: str = '{REDDITOR}_{TITLE}_{POSTID}' - self.folder_scheme: str = '{SUBREDDIT}' + self.file_scheme: str = "{REDDITOR}_{TITLE}_{POSTID}" + self.folder_scheme: str = "{SUBREDDIT}" self.ignore_user = [] self.include_id_file = [] self.limit: Optional[int] = None @@ -42,11 +43,11 @@ class Configuration(Namespace): self.max_score = None self.min_score_ratio = None self.max_score_ratio = None - self.sort: str = 'hot' + self.sort: str = "hot" self.submitted: bool = False self.subscribed: bool = False self.subreddit: list[str] = [] - self.time: str = 'all' + self.time: str = "all" self.time_format = None self.upvoted: bool = False self.user: list[str] = [] @@ -54,15 +55,15 @@ class Configuration(Namespace): # Archiver-specific options self.all_comments = False - self.format = 'json' + self.format = "json" self.comment_context: bool = False def process_click_arguments(self, context: click.Context): - if context.params.get('opts') is not None: - self.parse_yaml_options(context.params['opts']) + if context.params.get("opts") is not None: + self.parse_yaml_options(context.params["opts"]) for arg_key in context.params.keys(): if not hasattr(self, arg_key): - logger.warning(f'Ignoring an unknown CLI argument: {arg_key}') + logger.warning(f"Ignoring an unknown CLI argument: {arg_key}") continue val = context.params[arg_key] if val is None or val == (): @@ -73,16 +74,16 @@ class Configuration(Namespace): def parse_yaml_options(self, file_path: str): yaml_file_loc = Path(file_path) if not yaml_file_loc.exists(): - logger.error(f'No YAML file found at {yaml_file_loc}') + logger.error(f"No YAML file found at {yaml_file_loc}") return with yaml_file_loc.open() as file: try: opts = yaml.load(file, Loader=yaml.FullLoader) except yaml.YAMLError as e: - logger.error(f'Could not parse YAML options file: {e}') + logger.error(f"Could not parse YAML options file: {e}") return for arg_key, val in opts.items(): if not hasattr(self, arg_key): - logger.warning(f'Ignoring an unknown YAML argument: {arg_key}') + logger.warning(f"Ignoring an unknown YAML argument: {arg_key}") continue setattr(self, arg_key, val) diff --git a/bdfr/connector.py b/bdfr/connector.py index 3b359e8..ea970db 100644 --- a/bdfr/connector.py +++ b/bdfr/connector.py @@ -41,18 +41,18 @@ class RedditTypes: TOP = auto() class TimeType(Enum): - ALL = 'all' - DAY = 'day' - HOUR = 'hour' - MONTH = 'month' - WEEK = 'week' - YEAR = 'year' + ALL = "all" + DAY = "day" + HOUR = "hour" + MONTH = "month" + WEEK = "week" + YEAR = "year" class RedditConnector(metaclass=ABCMeta): def __init__(self, args: Configuration): self.args = args - self.config_directories = appdirs.AppDirs('bdfr', 'BDFR') + self.config_directories = appdirs.AppDirs("bdfr", "BDFR") self.run_time = datetime.now().isoformat() self._setup_internal_objects() @@ -68,13 +68,13 @@ class RedditConnector(metaclass=ABCMeta): self.parse_disabled_modules() self.download_filter = self.create_download_filter() - logger.log(9, 'Created download filter') + logger.log(9, "Created download filter") self.time_filter = self.create_time_filter() - logger.log(9, 'Created time filter') + logger.log(9, "Created time filter") self.sort_filter = self.create_sort_filter() - logger.log(9, 'Created sort filter') + logger.log(9, "Created sort filter") self.file_name_formatter = self.create_file_name_formatter() - logger.log(9, 'Create file name formatter') + logger.log(9, "Create file name formatter") self.create_reddit_instance() self.args.user = list(filter(None, [self.resolve_user_name(user) for user in self.args.user])) @@ -88,7 +88,7 @@ class RedditConnector(metaclass=ABCMeta): self.master_hash_list = {} self.authenticator = self.create_authenticator() - logger.log(9, 'Created site authenticator') + logger.log(9, "Created site authenticator") self.args.skip_subreddit = self.split_args_input(self.args.skip_subreddit) self.args.skip_subreddit = {sub.lower() for sub in self.args.skip_subreddit} @@ -96,18 +96,18 @@ class RedditConnector(metaclass=ABCMeta): def read_config(self): """Read any cfg values that need to be processed""" if self.args.max_wait_time is None: - self.args.max_wait_time = self.cfg_parser.getint('DEFAULT', 'max_wait_time', fallback=120) - logger.debug(f'Setting maximum download wait time to {self.args.max_wait_time} seconds') + self.args.max_wait_time = self.cfg_parser.getint("DEFAULT", "max_wait_time", fallback=120) + logger.debug(f"Setting maximum download wait time to {self.args.max_wait_time} seconds") if self.args.time_format is None: - option = self.cfg_parser.get('DEFAULT', 'time_format', fallback='ISO') - if re.match(r'^[\s\'\"]*$', option): - option = 'ISO' - logger.debug(f'Setting datetime format string to {option}') + option = self.cfg_parser.get("DEFAULT", "time_format", fallback="ISO") + if re.match(r"^[\s\'\"]*$", option): + option = "ISO" + logger.debug(f"Setting datetime format string to {option}") self.args.time_format = option if not self.args.disable_module: - self.args.disable_module = [self.cfg_parser.get('DEFAULT', 'disabled_modules', fallback='')] + self.args.disable_module = [self.cfg_parser.get("DEFAULT", "disabled_modules", fallback="")] # Update config on disk - with open(self.config_location, 'w') as file: + with open(self.config_location, "w") as file: self.cfg_parser.write(file) def parse_disabled_modules(self): @@ -119,48 +119,48 @@ class RedditConnector(metaclass=ABCMeta): def create_reddit_instance(self): if self.args.authenticate: - logger.debug('Using authenticated Reddit instance') - if not self.cfg_parser.has_option('DEFAULT', 'user_token'): - logger.log(9, 'Commencing OAuth2 authentication') - scopes = self.cfg_parser.get('DEFAULT', 'scopes', fallback='identity, history, read, save') + logger.debug("Using authenticated Reddit instance") + if not self.cfg_parser.has_option("DEFAULT", "user_token"): + logger.log(9, "Commencing OAuth2 authentication") + scopes = self.cfg_parser.get("DEFAULT", "scopes", fallback="identity, history, read, save") scopes = OAuth2Authenticator.split_scopes(scopes) oauth2_authenticator = OAuth2Authenticator( scopes, - self.cfg_parser.get('DEFAULT', 'client_id'), - self.cfg_parser.get('DEFAULT', 'client_secret'), + self.cfg_parser.get("DEFAULT", "client_id"), + self.cfg_parser.get("DEFAULT", "client_secret"), ) token = oauth2_authenticator.retrieve_new_token() - self.cfg_parser['DEFAULT']['user_token'] = token - with open(self.config_location, 'w') as file: + self.cfg_parser["DEFAULT"]["user_token"] = token + with open(self.config_location, "w") as file: self.cfg_parser.write(file, True) token_manager = OAuth2TokenManager(self.cfg_parser, self.config_location) self.authenticated = True self.reddit_instance = praw.Reddit( - client_id=self.cfg_parser.get('DEFAULT', 'client_id'), - client_secret=self.cfg_parser.get('DEFAULT', 'client_secret'), + client_id=self.cfg_parser.get("DEFAULT", "client_id"), + client_secret=self.cfg_parser.get("DEFAULT", "client_secret"), user_agent=socket.gethostname(), token_manager=token_manager, ) else: - logger.debug('Using unauthenticated Reddit instance') + logger.debug("Using unauthenticated Reddit instance") self.authenticated = False self.reddit_instance = praw.Reddit( - client_id=self.cfg_parser.get('DEFAULT', 'client_id'), - client_secret=self.cfg_parser.get('DEFAULT', 'client_secret'), + client_id=self.cfg_parser.get("DEFAULT", "client_id"), + client_secret=self.cfg_parser.get("DEFAULT", "client_secret"), user_agent=socket.gethostname(), ) def retrieve_reddit_lists(self) -> list[praw.models.ListingGenerator]: master_list = [] master_list.extend(self.get_subreddits()) - logger.log(9, 'Retrieved subreddits') + logger.log(9, "Retrieved subreddits") master_list.extend(self.get_multireddits()) - logger.log(9, 'Retrieved multireddits') + logger.log(9, "Retrieved multireddits") master_list.extend(self.get_user_data()) - logger.log(9, 'Retrieved user data') + logger.log(9, "Retrieved user data") master_list.extend(self.get_submissions_from_link()) - logger.log(9, 'Retrieved submissions for given links') + logger.log(9, "Retrieved submissions for given links") return master_list def determine_directories(self): @@ -178,37 +178,37 @@ class RedditConnector(metaclass=ABCMeta): self.config_location = cfg_path return possible_paths = [ - Path('./config.cfg'), - Path('./default_config.cfg'), - Path(self.config_directory, 'config.cfg'), - Path(self.config_directory, 'default_config.cfg'), + Path("./config.cfg"), + Path("./default_config.cfg"), + Path(self.config_directory, "config.cfg"), + Path(self.config_directory, "default_config.cfg"), ] self.config_location = None for path in possible_paths: if path.resolve().expanduser().exists(): self.config_location = path - logger.debug(f'Loading configuration from {path}') + logger.debug(f"Loading configuration from {path}") break if not self.config_location: - with importlib.resources.path('bdfr', 'default_config.cfg') as path: + with importlib.resources.path("bdfr", "default_config.cfg") as path: self.config_location = path - shutil.copy(self.config_location, Path(self.config_directory, 'default_config.cfg')) + shutil.copy(self.config_location, Path(self.config_directory, "default_config.cfg")) if not self.config_location: - raise errors.BulkDownloaderException('Could not find a configuration file to load') + raise errors.BulkDownloaderException("Could not find a configuration file to load") self.cfg_parser.read(self.config_location) def create_file_logger(self): main_logger = logging.getLogger() if self.args.log is None: - log_path = Path(self.config_directory, 'log_output.txt') + log_path = Path(self.config_directory, "log_output.txt") else: log_path = Path(self.args.log).resolve().expanduser() if not log_path.parent.exists(): - raise errors.BulkDownloaderException(f'Designated location for logfile does not exist') - backup_count = self.cfg_parser.getint('DEFAULT', 'backup_log_count', fallback=3) + raise errors.BulkDownloaderException(f"Designated location for logfile does not exist") + backup_count = self.cfg_parser.getint("DEFAULT", "backup_log_count", fallback=3) file_handler = logging.handlers.RotatingFileHandler( log_path, - mode='a', + mode="a", backupCount=backup_count, ) if log_path.exists(): @@ -216,10 +216,11 @@ class RedditConnector(metaclass=ABCMeta): file_handler.doRollover() except PermissionError: logger.critical( - 'Cannot rollover logfile, make sure this is the only ' - 'BDFR process or specify alternate logfile location') + "Cannot rollover logfile, make sure this is the only " + "BDFR process or specify alternate logfile location" + ) raise - formatter = logging.Formatter('[%(asctime)s - %(name)s - %(levelname)s] - %(message)s') + formatter = logging.Formatter("[%(asctime)s - %(name)s - %(levelname)s] - %(message)s") file_handler.setFormatter(formatter) file_handler.setLevel(0) @@ -227,16 +228,16 @@ class RedditConnector(metaclass=ABCMeta): @staticmethod def sanitise_subreddit_name(subreddit: str) -> str: - pattern = re.compile(r'^(?:https://www\.reddit\.com/)?(?:r/)?(.*?)/?$') + pattern = re.compile(r"^(?:https://www\.reddit\.com/)?(?:r/)?(.*?)/?$") match = re.match(pattern, subreddit) if not match: - raise errors.BulkDownloaderException(f'Could not find subreddit name in string {subreddit}') + raise errors.BulkDownloaderException(f"Could not find subreddit name in string {subreddit}") return match.group(1) @staticmethod def split_args_input(entries: list[str]) -> set[str]: all_entries = [] - split_pattern = re.compile(r'[,;]\s?') + split_pattern = re.compile(r"[,;]\s?") for entry in entries: results = re.split(split_pattern, entry) all_entries.extend([RedditConnector.sanitise_subreddit_name(name) for name in results]) @@ -251,13 +252,13 @@ class RedditConnector(metaclass=ABCMeta): subscribed_subreddits = list(self.reddit_instance.user.subreddits(limit=None)) subscribed_subreddits = {s.display_name for s in subscribed_subreddits} except prawcore.InsufficientScope: - logger.error('BDFR has insufficient scope to access subreddit lists') + logger.error("BDFR has insufficient scope to access subreddit lists") else: - logger.error('Cannot find subscribed subreddits without an authenticated instance') + logger.error("Cannot find subscribed subreddits without an authenticated instance") if self.args.subreddit or subscribed_subreddits: for reddit in self.split_args_input(self.args.subreddit) | subscribed_subreddits: - if reddit == 'friends' and self.authenticated is False: - logger.error('Cannot read friends subreddit without an authenticated instance') + if reddit == "friends" and self.authenticated is False: + logger.error("Cannot read friends subreddit without an authenticated instance") continue try: reddit = self.reddit_instance.subreddit(reddit) @@ -267,26 +268,29 @@ class RedditConnector(metaclass=ABCMeta): logger.error(e) continue if self.args.search: - out.append(reddit.search( - self.args.search, - sort=self.sort_filter.name.lower(), - limit=self.args.limit, - time_filter=self.time_filter.value, - )) + out.append( + reddit.search( + self.args.search, + sort=self.sort_filter.name.lower(), + limit=self.args.limit, + time_filter=self.time_filter.value, + ) + ) logger.debug( - f'Added submissions from subreddit {reddit} with the search term "{self.args.search}"') + f'Added submissions from subreddit {reddit} with the search term "{self.args.search}"' + ) else: out.append(self.create_filtered_listing_generator(reddit)) - logger.debug(f'Added submissions from subreddit {reddit}') + logger.debug(f"Added submissions from subreddit {reddit}") except (errors.BulkDownloaderException, praw.exceptions.PRAWException) as e: - logger.error(f'Failed to get submissions for subreddit {reddit}: {e}') + logger.error(f"Failed to get submissions for subreddit {reddit}: {e}") return out def resolve_user_name(self, in_name: str) -> str: - if in_name == 'me': + if in_name == "me": if self.authenticated: resolved_name = self.reddit_instance.user.me().name - logger.log(9, f'Resolved user to {resolved_name}') + logger.log(9, f"Resolved user to {resolved_name}") return resolved_name else: logger.warning('To use "me" as a user, an authenticated Reddit instance must be used') @@ -318,7 +322,7 @@ class RedditConnector(metaclass=ABCMeta): def get_multireddits(self) -> list[Iterator]: if self.args.multireddit: if len(self.args.user) != 1: - logger.error(f'Only 1 user can be supplied when retrieving from multireddits') + logger.error(f"Only 1 user can be supplied when retrieving from multireddits") return [] out = [] for multi in self.split_args_input(self.args.multireddit): @@ -327,9 +331,9 @@ class RedditConnector(metaclass=ABCMeta): if not multi.subreddits: raise errors.BulkDownloaderException out.append(self.create_filtered_listing_generator(multi)) - logger.debug(f'Added submissions from multireddit {multi}') + logger.debug(f"Added submissions from multireddit {multi}") except (errors.BulkDownloaderException, praw.exceptions.PRAWException, prawcore.PrawcoreException) as e: - logger.error(f'Failed to get submissions for multireddit {multi}: {e}') + logger.error(f"Failed to get submissions for multireddit {multi}: {e}") return out else: return [] @@ -344,7 +348,7 @@ class RedditConnector(metaclass=ABCMeta): def get_user_data(self) -> list[Iterator]: if any([self.args.submitted, self.args.upvoted, self.args.saved]): if not self.args.user: - logger.warning('At least one user must be supplied to download user data') + logger.warning("At least one user must be supplied to download user data") return [] generators = [] for user in self.args.user: @@ -354,18 +358,20 @@ class RedditConnector(metaclass=ABCMeta): logger.error(e) continue if self.args.submitted: - logger.debug(f'Retrieving submitted posts of user {self.args.user}') - generators.append(self.create_filtered_listing_generator( - self.reddit_instance.redditor(user).submissions, - )) + logger.debug(f"Retrieving submitted posts of user {self.args.user}") + generators.append( + self.create_filtered_listing_generator( + self.reddit_instance.redditor(user).submissions, + ) + ) if not self.authenticated and any((self.args.upvoted, self.args.saved)): - logger.warning('Accessing user lists requires authentication') + logger.warning("Accessing user lists requires authentication") else: if self.args.upvoted: - logger.debug(f'Retrieving upvoted posts of user {self.args.user}') + logger.debug(f"Retrieving upvoted posts of user {self.args.user}") generators.append(self.reddit_instance.redditor(user).upvoted(limit=self.args.limit)) if self.args.saved: - logger.debug(f'Retrieving saved posts of user {self.args.user}') + logger.debug(f"Retrieving saved posts of user {self.args.user}") generators.append(self.reddit_instance.redditor(user).saved(limit=self.args.limit)) return generators else: @@ -377,10 +383,10 @@ class RedditConnector(metaclass=ABCMeta): if user.id: return except prawcore.exceptions.NotFound: - raise errors.BulkDownloaderException(f'Could not find user {name}') + raise errors.BulkDownloaderException(f"Could not find user {name}") except AttributeError: - if hasattr(user, 'is_suspended'): - raise errors.BulkDownloaderException(f'User {name} is banned') + if hasattr(user, "is_suspended"): + raise errors.BulkDownloaderException(f"User {name} is banned") def create_file_name_formatter(self) -> FileNameFormatter: return FileNameFormatter(self.args.file_scheme, self.args.folder_scheme, self.args.time_format) @@ -409,7 +415,7 @@ class RedditConnector(metaclass=ABCMeta): @staticmethod def check_subreddit_status(subreddit: praw.models.Subreddit): - if subreddit.display_name in ('all', 'friends'): + if subreddit.display_name in ("all", "friends"): return try: assert subreddit.id @@ -418,7 +424,7 @@ class RedditConnector(metaclass=ABCMeta): except prawcore.Redirect: raise errors.BulkDownloaderException(f"Source {subreddit.display_name} does not exist") except prawcore.Forbidden: - raise errors.BulkDownloaderException(f'Source {subreddit.display_name} is private and cannot be scraped') + raise errors.BulkDownloaderException(f"Source {subreddit.display_name} is private and cannot be scraped") @staticmethod def read_id_files(file_locations: list[str]) -> set[str]: @@ -426,9 +432,9 @@ class RedditConnector(metaclass=ABCMeta): for id_file in file_locations: id_file = Path(id_file).resolve().expanduser() if not id_file.exists(): - logger.warning(f'ID file at {id_file} does not exist') + logger.warning(f"ID file at {id_file} does not exist") continue - with id_file.open('r') as file: + with id_file.open("r") as file: for line in file: out.append(line.strip()) return set(out) diff --git a/bdfr/download_filter.py b/bdfr/download_filter.py index 28053be..9019cc9 100644 --- a/bdfr/download_filter.py +++ b/bdfr/download_filter.py @@ -33,8 +33,8 @@ class DownloadFilter: def _check_extension(self, resource_extension: str) -> bool: if not self.excluded_extensions: return True - combined_extensions = '|'.join(self.excluded_extensions) - pattern = re.compile(r'.*({})$'.format(combined_extensions)) + combined_extensions = "|".join(self.excluded_extensions) + pattern = re.compile(r".*({})$".format(combined_extensions)) if re.match(pattern, resource_extension): logger.log(9, f'Url "{resource_extension}" matched with "{pattern}"') return False @@ -44,8 +44,8 @@ class DownloadFilter: def _check_domain(self, url: str) -> bool: if not self.excluded_domains: return True - combined_domains = '|'.join(self.excluded_domains) - pattern = re.compile(r'https?://.*({}).*'.format(combined_domains)) + combined_domains = "|".join(self.excluded_domains) + pattern = re.compile(r"https?://.*({}).*".format(combined_domains)) if re.match(pattern, url): logger.log(9, f'Url "{url}" matched with "{pattern}"') return False diff --git a/bdfr/downloader.py b/bdfr/downloader.py index 6f26937..fa5d10c 100644 --- a/bdfr/downloader.py +++ b/bdfr/downloader.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) def _calc_hash(existing_file: Path): chunk_size = 1024 * 1024 md5_hash = hashlib.md5() - with existing_file.open('rb') as file: + with existing_file.open("rb") as file: chunk = file.read(chunk_size) while chunk: md5_hash.update(chunk) @@ -46,28 +46,32 @@ class RedditDownloader(RedditConnector): try: self._download_submission(submission) except prawcore.PrawcoreException as e: - logger.error(f'Submission {submission.id} failed to download due to a PRAW exception: {e}') + logger.error(f"Submission {submission.id} failed to download due to a PRAW exception: {e}") def _download_submission(self, submission: praw.models.Submission): if submission.id in self.excluded_submission_ids: - logger.debug(f'Object {submission.id} in exclusion list, skipping') + logger.debug(f"Object {submission.id} in exclusion list, skipping") return elif submission.subreddit.display_name.lower() in self.args.skip_subreddit: - logger.debug(f'Submission {submission.id} in {submission.subreddit.display_name} in skip list') + logger.debug(f"Submission {submission.id} in {submission.subreddit.display_name} in skip list") return - elif (submission.author and submission.author.name in self.args.ignore_user) or \ - (submission.author is None and 'DELETED' in self.args.ignore_user): + elif (submission.author and submission.author.name in self.args.ignore_user) or ( + submission.author is None and "DELETED" in self.args.ignore_user + ): logger.debug( - f'Submission {submission.id} in {submission.subreddit.display_name} skipped' - f' due to {submission.author.name if submission.author else "DELETED"} being an ignored user') + f"Submission {submission.id} in {submission.subreddit.display_name} skipped" + f' due to {submission.author.name if submission.author else "DELETED"} being an ignored user' + ) return elif self.args.min_score and submission.score < self.args.min_score: logger.debug( - f"Submission {submission.id} filtered due to score {submission.score} < [{self.args.min_score}]") + f"Submission {submission.id} filtered due to score {submission.score} < [{self.args.min_score}]" + ) return elif self.args.max_score and self.args.max_score < submission.score: logger.debug( - f"Submission {submission.id} filtered due to score {submission.score} > [{self.args.max_score}]") + f"Submission {submission.id} filtered due to score {submission.score} > [{self.args.max_score}]" + ) return elif (self.args.min_score_ratio and submission.upvote_ratio < self.args.min_score_ratio) or ( self.args.max_score_ratio and self.args.max_score_ratio < submission.upvote_ratio @@ -75,47 +79,48 @@ class RedditDownloader(RedditConnector): logger.debug(f"Submission {submission.id} filtered due to score ratio ({submission.upvote_ratio})") return elif not isinstance(submission, praw.models.Submission): - logger.warning(f'{submission.id} is not a submission') + logger.warning(f"{submission.id} is not a submission") return elif not self.download_filter.check_url(submission.url): - logger.debug(f'Submission {submission.id} filtered due to URL {submission.url}') + logger.debug(f"Submission {submission.id} filtered due to URL {submission.url}") return - logger.debug(f'Attempting to download submission {submission.id}') + logger.debug(f"Attempting to download submission {submission.id}") try: downloader_class = DownloadFactory.pull_lever(submission.url) downloader = downloader_class(submission) - logger.debug(f'Using {downloader_class.__name__} with url {submission.url}') + logger.debug(f"Using {downloader_class.__name__} with url {submission.url}") except errors.NotADownloadableLinkError as e: - logger.error(f'Could not download submission {submission.id}: {e}') + logger.error(f"Could not download submission {submission.id}: {e}") return if downloader_class.__name__.lower() in self.args.disable_module: - logger.debug(f'Submission {submission.id} skipped due to disabled module {downloader_class.__name__}') + logger.debug(f"Submission {submission.id} skipped due to disabled module {downloader_class.__name__}") return try: content = downloader.find_resources(self.authenticator) except errors.SiteDownloaderError as e: - logger.error(f'Site {downloader_class.__name__} failed to download submission {submission.id}: {e}') + logger.error(f"Site {downloader_class.__name__} failed to download submission {submission.id}: {e}") return for destination, res in self.file_name_formatter.format_resource_paths(content, self.download_directory): if destination.exists(): - logger.debug(f'File {destination} from submission {submission.id} already exists, continuing') + logger.debug(f"File {destination} from submission {submission.id} already exists, continuing") continue elif not self.download_filter.check_resource(res): - logger.debug(f'Download filter removed {submission.id} file with URL {submission.url}') + logger.debug(f"Download filter removed {submission.id} file with URL {submission.url}") continue try: - res.download({'max_wait_time': self.args.max_wait_time}) + res.download({"max_wait_time": self.args.max_wait_time}) except errors.BulkDownloaderException as e: - logger.error(f'Failed to download resource {res.url} in submission {submission.id} ' - f'with downloader {downloader_class.__name__}: {e}') + logger.error( + f"Failed to download resource {res.url} in submission {submission.id} " + f"with downloader {downloader_class.__name__}: {e}" + ) return resource_hash = res.hash.hexdigest() destination.parent.mkdir(parents=True, exist_ok=True) if resource_hash in self.master_hash_list: if self.args.no_dupes: - logger.info( - f'Resource hash {resource_hash} from submission {submission.id} downloaded elsewhere') + logger.info(f"Resource hash {resource_hash} from submission {submission.id} downloaded elsewhere") return elif self.args.make_hard_links: try: @@ -123,29 +128,30 @@ class RedditDownloader(RedditConnector): except AttributeError: self.master_hash_list[resource_hash].link_to(destination) logger.info( - f'Hard link made linking {destination} to {self.master_hash_list[resource_hash]}' - f' in submission {submission.id}') + f"Hard link made linking {destination} to {self.master_hash_list[resource_hash]}" + f" in submission {submission.id}" + ) return try: - with destination.open('wb') as file: + with destination.open("wb") as file: file.write(res.content) - logger.debug(f'Written file to {destination}') + logger.debug(f"Written file to {destination}") except OSError as e: logger.exception(e) - logger.error(f'Failed to write file in submission {submission.id} to {destination}: {e}') + logger.error(f"Failed to write file in submission {submission.id} to {destination}: {e}") return creation_time = time.mktime(datetime.fromtimestamp(submission.created_utc).timetuple()) os.utime(destination, (creation_time, creation_time)) self.master_hash_list[resource_hash] = destination - logger.debug(f'Hash added to master list: {resource_hash}') - logger.info(f'Downloaded submission {submission.id} from {submission.subreddit.display_name}') + logger.debug(f"Hash added to master list: {resource_hash}") + logger.info(f"Downloaded submission {submission.id} from {submission.subreddit.display_name}") @staticmethod def scan_existing_files(directory: Path) -> dict[str, Path]: files = [] for (dirpath, dirnames, filenames) in os.walk(directory): files.extend([Path(dirpath, file) for file in filenames]) - logger.info(f'Calculating hashes for {len(files)} files') + logger.info(f"Calculating hashes for {len(files)} files") pool = Pool(15) results = pool.map(_calc_hash, files) diff --git a/bdfr/exceptions.py b/bdfr/exceptions.py index 91fda2c..1757cd9 100644 --- a/bdfr/exceptions.py +++ b/bdfr/exceptions.py @@ -1,5 +1,6 @@ #!/usr/bin/env + class BulkDownloaderException(Exception): pass diff --git a/bdfr/file_name_formatter.py b/bdfr/file_name_formatter.py index 4a039c9..684c626 100644 --- a/bdfr/file_name_formatter.py +++ b/bdfr/file_name_formatter.py @@ -18,20 +18,20 @@ logger = logging.getLogger(__name__) class FileNameFormatter: key_terms = ( - 'date', - 'flair', - 'postid', - 'redditor', - 'subreddit', - 'title', - 'upvotes', + "date", + "flair", + "postid", + "redditor", + "subreddit", + "title", + "upvotes", ) def __init__(self, file_format_string: str, directory_format_string: str, time_format_string: str): if not self.validate_string(file_format_string): raise BulkDownloaderException(f'"{file_format_string}" is not a valid format string') self.file_format_string = file_format_string - self.directory_format_string: list[str] = directory_format_string.split('/') + self.directory_format_string: list[str] = directory_format_string.split("/") self.time_format_string = time_format_string def _format_name(self, submission: Union[Comment, Submission], format_string: str) -> str: @@ -40,108 +40,111 @@ class FileNameFormatter: elif isinstance(submission, Comment): attributes = self._generate_name_dict_from_comment(submission) else: - raise BulkDownloaderException(f'Cannot name object {type(submission).__name__}') + raise BulkDownloaderException(f"Cannot name object {type(submission).__name__}") result = format_string for key in attributes.keys(): - if re.search(fr'(?i).*{{{key}}}.*', result): - key_value = str(attributes.get(key, 'unknown')) + if re.search(rf"(?i).*{{{key}}}.*", result): + key_value = str(attributes.get(key, "unknown")) key_value = FileNameFormatter._convert_unicode_escapes(key_value) - key_value = key_value.replace('\\', '\\\\') - result = re.sub(fr'(?i){{{key}}}', key_value, result) + key_value = key_value.replace("\\", "\\\\") + result = re.sub(rf"(?i){{{key}}}", key_value, result) - result = result.replace('/', '') + result = result.replace("/", "") - if platform.system() == 'Windows': + if platform.system() == "Windows": result = FileNameFormatter._format_for_windows(result) return result @staticmethod def _convert_unicode_escapes(in_string: str) -> str: - pattern = re.compile(r'(\\u\d{4})') + pattern = re.compile(r"(\\u\d{4})") matches = re.search(pattern, in_string) if matches: for match in matches.groups(): - converted_match = bytes(match, 'utf-8').decode('unicode-escape') + converted_match = bytes(match, "utf-8").decode("unicode-escape") in_string = in_string.replace(match, converted_match) return in_string def _generate_name_dict_from_submission(self, submission: Submission) -> dict: submission_attributes = { - 'title': submission.title, - 'subreddit': submission.subreddit.display_name, - 'redditor': submission.author.name if submission.author else 'DELETED', - 'postid': submission.id, - 'upvotes': submission.score, - 'flair': submission.link_flair_text, - 'date': self._convert_timestamp(submission.created_utc), + "title": submission.title, + "subreddit": submission.subreddit.display_name, + "redditor": submission.author.name if submission.author else "DELETED", + "postid": submission.id, + "upvotes": submission.score, + "flair": submission.link_flair_text, + "date": self._convert_timestamp(submission.created_utc), } return submission_attributes def _convert_timestamp(self, timestamp: float) -> str: input_time = datetime.datetime.fromtimestamp(timestamp) - if self.time_format_string.upper().strip() == 'ISO': + if self.time_format_string.upper().strip() == "ISO": return input_time.isoformat() else: return input_time.strftime(self.time_format_string) def _generate_name_dict_from_comment(self, comment: Comment) -> dict: comment_attributes = { - 'title': comment.submission.title, - 'subreddit': comment.subreddit.display_name, - 'redditor': comment.author.name if comment.author else 'DELETED', - 'postid': comment.id, - 'upvotes': comment.score, - 'flair': '', - 'date': self._convert_timestamp(comment.created_utc), + "title": comment.submission.title, + "subreddit": comment.subreddit.display_name, + "redditor": comment.author.name if comment.author else "DELETED", + "postid": comment.id, + "upvotes": comment.score, + "flair": "", + "date": self._convert_timestamp(comment.created_utc), } return comment_attributes def format_path( - self, - resource: Resource, - destination_directory: Path, - index: Optional[int] = None, + self, + resource: Resource, + destination_directory: Path, + index: Optional[int] = None, ) -> Path: subfolder = Path( destination_directory, *[self._format_name(resource.source_submission, part) for part in self.directory_format_string], ) - index = f'_{index}' if index else '' + index = f"_{index}" if index else "" if not resource.extension: - raise BulkDownloaderException(f'Resource from {resource.url} has no extension') + raise BulkDownloaderException(f"Resource from {resource.url} has no extension") file_name = str(self._format_name(resource.source_submission, self.file_format_string)) - file_name = re.sub(r'\n', ' ', file_name) + file_name = re.sub(r"\n", " ", file_name) - if not re.match(r'.*\.$', file_name) and not re.match(r'^\..*', resource.extension): - ending = index + '.' + resource.extension + if not re.match(r".*\.$", file_name) and not re.match(r"^\..*", resource.extension): + ending = index + "." + resource.extension else: ending = index + resource.extension try: file_path = self.limit_file_name_length(file_name, ending, subfolder) except TypeError: - raise BulkDownloaderException(f'Could not determine path name: {subfolder}, {index}, {resource.extension}') + raise BulkDownloaderException(f"Could not determine path name: {subfolder}, {index}, {resource.extension}") return file_path @staticmethod def limit_file_name_length(filename: str, ending: str, root: Path) -> Path: root = root.resolve().expanduser() - possible_id = re.search(r'((?:_\w{6})?$)', filename) + possible_id = re.search(r"((?:_\w{6})?$)", filename) if possible_id: ending = possible_id.group(1) + ending - filename = filename[:possible_id.start()] + filename = filename[: possible_id.start()] max_path = FileNameFormatter.find_max_path_length() max_file_part_length_chars = 255 - len(ending) - max_file_part_length_bytes = 255 - len(ending.encode('utf-8')) + max_file_part_length_bytes = 255 - len(ending.encode("utf-8")) max_path_length = max_path - len(ending) - len(str(root)) - 1 out = Path(root, filename + ending) - while any([len(filename) > max_file_part_length_chars, - len(filename.encode('utf-8')) > max_file_part_length_bytes, - len(str(out)) > max_path_length, - ]): + while any( + [ + len(filename) > max_file_part_length_chars, + len(filename.encode("utf-8")) > max_file_part_length_bytes, + len(str(out)) > max_path_length, + ] + ): filename = filename[:-1] out = Path(root, filename + ending) @@ -150,44 +153,46 @@ class FileNameFormatter: @staticmethod def find_max_path_length() -> int: try: - return int(subprocess.check_output(['getconf', 'PATH_MAX', '/'])) + return int(subprocess.check_output(["getconf", "PATH_MAX", "/"])) except (ValueError, subprocess.CalledProcessError, OSError): - if platform.system() == 'Windows': + if platform.system() == "Windows": return 260 else: return 4096 def format_resource_paths( - self, - resources: list[Resource], - destination_directory: Path, + self, + resources: list[Resource], + destination_directory: Path, ) -> list[tuple[Path, Resource]]: out = [] if len(resources) == 1: try: out.append((self.format_path(resources[0], destination_directory, None), resources[0])) except BulkDownloaderException as e: - logger.error(f'Could not generate file path for resource {resources[0].url}: {e}') - logger.exception('Could not generate file path') + logger.error(f"Could not generate file path for resource {resources[0].url}: {e}") + logger.exception("Could not generate file path") else: for i, res in enumerate(resources, start=1): - logger.log(9, f'Formatting filename with index {i}') + logger.log(9, f"Formatting filename with index {i}") try: out.append((self.format_path(res, destination_directory, i), res)) except BulkDownloaderException as e: - logger.error(f'Could not generate file path for resource {res.url}: {e}') - logger.exception('Could not generate file path') + logger.error(f"Could not generate file path for resource {res.url}: {e}") + logger.exception("Could not generate file path") return out @staticmethod def validate_string(test_string: str) -> bool: if not test_string: return False - result = any([f'{{{key}}}' in test_string.lower() for key in FileNameFormatter.key_terms]) + result = any([f"{{{key}}}" in test_string.lower() for key in FileNameFormatter.key_terms]) if result: - if 'POSTID' not in test_string: - logger.warning('Some files might not be downloaded due to name conflicts as filenames are' - ' not guaranteed to be be unique without {POSTID}') + if "POSTID" not in test_string: + logger.warning( + "Some files might not be downloaded due to name conflicts as filenames are" + " not guaranteed to be be unique without {POSTID}" + ) return True else: return False @@ -196,11 +201,11 @@ class FileNameFormatter: def _format_for_windows(input_string: str) -> str: invalid_characters = r'<>:"\/|?*' for char in invalid_characters: - input_string = input_string.replace(char, '') + input_string = input_string.replace(char, "") input_string = FileNameFormatter._strip_emojis(input_string) return input_string @staticmethod def _strip_emojis(input_string: str) -> str: - result = input_string.encode('ascii', errors='ignore').decode('utf-8') + result = input_string.encode("ascii", errors="ignore").decode("utf-8") return result diff --git a/bdfr/oauth2.py b/bdfr/oauth2.py index bd60c9b..60f2169 100644 --- a/bdfr/oauth2.py +++ b/bdfr/oauth2.py @@ -17,7 +17,6 @@ logger = logging.getLogger(__name__) class OAuth2Authenticator: - def __init__(self, wanted_scopes: set[str], client_id: str, client_secret: str): self._check_scopes(wanted_scopes) self.scopes = wanted_scopes @@ -26,39 +25,41 @@ class OAuth2Authenticator: @staticmethod def _check_scopes(wanted_scopes: set[str]): - response = requests.get('https://www.reddit.com/api/v1/scopes.json', - headers={'User-Agent': 'fetch-scopes test'}) + response = requests.get( + "https://www.reddit.com/api/v1/scopes.json", headers={"User-Agent": "fetch-scopes test"} + ) known_scopes = [scope for scope, data in response.json().items()] - known_scopes.append('*') + known_scopes.append("*") for scope in wanted_scopes: if scope not in known_scopes: - raise BulkDownloaderException(f'Scope {scope} is not known to reddit') + raise BulkDownloaderException(f"Scope {scope} is not known to reddit") @staticmethod def split_scopes(scopes: str) -> set[str]: - scopes = re.split(r'[,: ]+', scopes) + scopes = re.split(r"[,: ]+", scopes) return set(scopes) def retrieve_new_token(self) -> str: reddit = praw.Reddit( - redirect_uri='http://localhost:7634', - user_agent='obtain_refresh_token for BDFR', + redirect_uri="http://localhost:7634", + user_agent="obtain_refresh_token for BDFR", client_id=self.client_id, - client_secret=self.client_secret) + client_secret=self.client_secret, + ) state = str(random.randint(0, 65000)) - url = reddit.auth.url(self.scopes, state, 'permanent') - logger.warning('Authentication action required before the program can proceed') - logger.warning(f'Authenticate at {url}') + url = reddit.auth.url(self.scopes, state, "permanent") + logger.warning("Authentication action required before the program can proceed") + logger.warning(f"Authenticate at {url}") client = self.receive_connection() - data = client.recv(1024).decode('utf-8') - param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&') - params = {key: value for (key, value) in [token.split('=') for token in param_tokens]} + data = client.recv(1024).decode("utf-8") + param_tokens = data.split(" ", 2)[1].split("?", 1)[1].split("&") + params = {key: value for (key, value) in [token.split("=") for token in param_tokens]} - if state != params['state']: + if state != params["state"]: self.send_message(client) raise RedditAuthenticationError(f'State mismatch in OAuth2. Expected: {state} Received: {params["state"]}') - elif 'error' in params: + elif "error" in params: self.send_message(client) raise RedditAuthenticationError(f'Error in OAuth2: {params["error"]}') @@ -70,19 +71,19 @@ class OAuth2Authenticator: def receive_connection() -> socket.socket: server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - server.bind(('0.0.0.0', 7634)) - logger.log(9, 'Server listening on 0.0.0.0:7634') + server.bind(("0.0.0.0", 7634)) + logger.log(9, "Server listening on 0.0.0.0:7634") server.listen(1) client = server.accept()[0] server.close() - logger.log(9, 'Server closed') + logger.log(9, "Server closed") return client @staticmethod - def send_message(client: socket.socket, message: str = ''): - client.send(f'HTTP/1.1 200 OK\r\n\r\n{message}'.encode('utf-8')) + def send_message(client: socket.socket, message: str = ""): + client.send(f"HTTP/1.1 200 OK\r\n\r\n{message}".encode("utf-8")) client.close() @@ -94,14 +95,14 @@ class OAuth2TokenManager(praw.reddit.BaseTokenManager): def pre_refresh_callback(self, authorizer: praw.reddit.Authorizer): if authorizer.refresh_token is None: - if self.config.has_option('DEFAULT', 'user_token'): - authorizer.refresh_token = self.config.get('DEFAULT', 'user_token') - logger.log(9, 'Loaded OAuth2 token for authoriser') + if self.config.has_option("DEFAULT", "user_token"): + authorizer.refresh_token = self.config.get("DEFAULT", "user_token") + logger.log(9, "Loaded OAuth2 token for authoriser") else: - raise RedditAuthenticationError('No auth token loaded in configuration') + raise RedditAuthenticationError("No auth token loaded in configuration") def post_refresh_callback(self, authorizer: praw.reddit.Authorizer): - self.config.set('DEFAULT', 'user_token', authorizer.refresh_token) - with open(self.config_location, 'w') as file: + self.config.set("DEFAULT", "user_token", authorizer.refresh_token) + with open(self.config_location, "w") as file: self.config.write(file, True) - logger.log(9, f'Written OAuth2 token from authoriser to {self.config_location}') + logger.log(9, f"Written OAuth2 token from authoriser to {self.config_location}") diff --git a/bdfr/resource.py b/bdfr/resource.py index 68a42e1..0f5404c 100644 --- a/bdfr/resource.py +++ b/bdfr/resource.py @@ -39,7 +39,7 @@ class Resource: try: content = self.download_function(download_parameters) except requests.exceptions.ConnectionError as e: - raise BulkDownloaderException(f'Could not download resource: {e}') + raise BulkDownloaderException(f"Could not download resource: {e}") except BulkDownloaderException: raise if content: @@ -51,7 +51,7 @@ class Resource: self.hash = hashlib.md5(self.content) def _determine_extension(self) -> Optional[str]: - extension_pattern = re.compile(r'.*(\..{3,5})$') + extension_pattern = re.compile(r".*(\..{3,5})$") stripped_url = urllib.parse.urlsplit(self.url).path match = re.search(extension_pattern, stripped_url) if match: @@ -59,27 +59,28 @@ class Resource: @staticmethod def http_download(url: str, download_parameters: dict) -> Optional[bytes]: - headers = download_parameters.get('headers') + headers = download_parameters.get("headers") current_wait_time = 60 - if 'max_wait_time' in download_parameters: - max_wait_time = download_parameters['max_wait_time'] + if "max_wait_time" in download_parameters: + max_wait_time = download_parameters["max_wait_time"] else: max_wait_time = 300 while True: try: response = requests.get(url, headers=headers) - if re.match(r'^2\d{2}', str(response.status_code)) and response.content: + if re.match(r"^2\d{2}", str(response.status_code)) and response.content: return response.content elif response.status_code in (408, 429): - raise requests.exceptions.ConnectionError(f'Response code {response.status_code}') + raise requests.exceptions.ConnectionError(f"Response code {response.status_code}") else: raise BulkDownloaderException( - f'Unrecoverable error requesting resource: HTTP Code {response.status_code}') + f"Unrecoverable error requesting resource: HTTP Code {response.status_code}" + ) except (requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError) as e: - logger.warning(f'Error occured downloading from {url}, waiting {current_wait_time} seconds: {e}') + logger.warning(f"Error occured downloading from {url}, waiting {current_wait_time} seconds: {e}") time.sleep(current_wait_time) if current_wait_time < max_wait_time: current_wait_time += 60 else: - logger.error(f'Max wait time exceeded for resource at url {url}') + logger.error(f"Max wait time exceeded for resource at url {url}") raise diff --git a/bdfr/site_downloaders/base_downloader.py b/bdfr/site_downloaders/base_downloader.py index 10787b8..f3ecec5 100644 --- a/bdfr/site_downloaders/base_downloader.py +++ b/bdfr/site_downloaders/base_downloader.py @@ -31,7 +31,7 @@ class BaseDownloader(ABC): res = requests.get(url, cookies=cookies, headers=headers) except requests.exceptions.RequestException as e: logger.exception(e) - raise SiteDownloaderError(f'Failed to get page {url}') + raise SiteDownloaderError(f"Failed to get page {url}") if res.status_code != 200: - raise ResourceNotFound(f'Server responded with {res.status_code} to {url}') + raise ResourceNotFound(f"Server responded with {res.status_code} to {url}") return res diff --git a/bdfr/site_downloaders/delay_for_reddit.py b/bdfr/site_downloaders/delay_for_reddit.py index 149e403..3380731 100644 --- a/bdfr/site_downloaders/delay_for_reddit.py +++ b/bdfr/site_downloaders/delay_for_reddit.py @@ -5,8 +5,8 @@ from typing import Optional from praw.models import Submission -from bdfr.site_authenticator import SiteAuthenticator from bdfr.resource import Resource +from bdfr.site_authenticator import SiteAuthenticator from bdfr.site_downloaders.base_downloader import BaseDownloader logger = logging.getLogger(__name__) diff --git a/bdfr/site_downloaders/direct.py b/bdfr/site_downloaders/direct.py index 833acae..4a6ac92 100644 --- a/bdfr/site_downloaders/direct.py +++ b/bdfr/site_downloaders/direct.py @@ -4,8 +4,8 @@ from typing import Optional from praw.models import Submission -from bdfr.site_authenticator import SiteAuthenticator from bdfr.resource import Resource +from bdfr.site_authenticator import SiteAuthenticator from bdfr.site_downloaders.base_downloader import BaseDownloader diff --git a/bdfr/site_downloaders/download_factory.py b/bdfr/site_downloaders/download_factory.py index 75beeae..638316f 100644 --- a/bdfr/site_downloaders/download_factory.py +++ b/bdfr/site_downloaders/download_factory.py @@ -26,62 +26,63 @@ class DownloadFactory: @staticmethod def pull_lever(url: str) -> Type[BaseDownloader]: sanitised_url = DownloadFactory.sanitise_url(url) - if re.match(r'(i\.|m\.)?imgur', sanitised_url): + if re.match(r"(i\.|m\.)?imgur", sanitised_url): return Imgur - elif re.match(r'(i\.)?(redgifs|gifdeliverynetwork)', sanitised_url): + elif re.match(r"(i\.)?(redgifs|gifdeliverynetwork)", sanitised_url): return Redgifs - elif re.match(r'.*/.*\.\w{3,4}(\?[\w;&=]*)?$', sanitised_url) and \ - not DownloadFactory.is_web_resource(sanitised_url): + elif re.match(r".*/.*\.\w{3,4}(\?[\w;&=]*)?$", sanitised_url) and not DownloadFactory.is_web_resource( + sanitised_url + ): return Direct - elif re.match(r'erome\.com.*', sanitised_url): + elif re.match(r"erome\.com.*", sanitised_url): return Erome - elif re.match(r'delayforreddit\.com', sanitised_url): + elif re.match(r"delayforreddit\.com", sanitised_url): return DelayForReddit - elif re.match(r'reddit\.com/gallery/.*', sanitised_url): + elif re.match(r"reddit\.com/gallery/.*", sanitised_url): return Gallery - elif re.match(r'patreon\.com.*', sanitised_url): + elif re.match(r"patreon\.com.*", sanitised_url): return Gallery - elif re.match(r'gfycat\.', sanitised_url): + elif re.match(r"gfycat\.", sanitised_url): return Gfycat - elif re.match(r'reddit\.com/r/', sanitised_url): + elif re.match(r"reddit\.com/r/", sanitised_url): return SelfPost - elif re.match(r'(m\.)?youtu\.?be', sanitised_url): + elif re.match(r"(m\.)?youtu\.?be", sanitised_url): return Youtube - elif re.match(r'i\.redd\.it.*', sanitised_url): + elif re.match(r"i\.redd\.it.*", sanitised_url): return Direct - elif re.match(r'v\.redd\.it.*', sanitised_url): + elif re.match(r"v\.redd\.it.*", sanitised_url): return VReddit - elif re.match(r'pornhub\.com.*', sanitised_url): + elif re.match(r"pornhub\.com.*", sanitised_url): return PornHub - elif re.match(r'vidble\.com', sanitised_url): + elif re.match(r"vidble\.com", sanitised_url): return Vidble elif YtdlpFallback.can_handle_link(sanitised_url): return YtdlpFallback else: - raise NotADownloadableLinkError(f'No downloader module exists for url {url}') + raise NotADownloadableLinkError(f"No downloader module exists for url {url}") @staticmethod def sanitise_url(url: str) -> str: - beginning_regex = re.compile(r'\s*(www\.?)?') + beginning_regex = re.compile(r"\s*(www\.?)?") split_url = urllib.parse.urlsplit(url) split_url = split_url.netloc + split_url.path - split_url = re.sub(beginning_regex, '', split_url) + split_url = re.sub(beginning_regex, "", split_url) return split_url @staticmethod def is_web_resource(url: str) -> bool: web_extensions = ( - 'asp', - 'aspx', - 'cfm', - 'cfml', - 'css', - 'htm', - 'html', - 'js', - 'php', - 'php3', - 'xhtml', + "asp", + "aspx", + "cfm", + "cfml", + "css", + "htm", + "html", + "js", + "php", + "php3", + "xhtml", ) if re.match(rf'(?i).*/.*\.({"|".join(web_extensions)})$', url): return True diff --git a/bdfr/site_downloaders/erome.py b/bdfr/site_downloaders/erome.py index 6250415..26469bc 100644 --- a/bdfr/site_downloaders/erome.py +++ b/bdfr/site_downloaders/erome.py @@ -23,34 +23,34 @@ class Erome(BaseDownloader): links = self._get_links(self.post.url) if not links: - raise SiteDownloaderError('Erome parser could not find any links') + raise SiteDownloaderError("Erome parser could not find any links") out = [] for link in links: - if not re.match(r'https?://.*', link): - link = 'https://' + link + if not re.match(r"https?://.*", link): + link = "https://" + link out.append(Resource(self.post, link, self.erome_download(link))) return out @staticmethod def _get_links(url: str) -> set[str]: page = Erome.retrieve_url(url) - soup = bs4.BeautifulSoup(page.text, 'html.parser') - front_images = soup.find_all('img', attrs={'class': 'lasyload'}) - out = [im.get('data-src') for im in front_images] + soup = bs4.BeautifulSoup(page.text, "html.parser") + front_images = soup.find_all("img", attrs={"class": "lasyload"}) + out = [im.get("data-src") for im in front_images] - videos = soup.find_all('source') - out.extend([vid.get('src') for vid in videos]) + videos = soup.find_all("source") + out.extend([vid.get("src") for vid in videos]) return set(out) @staticmethod def erome_download(url: str) -> Callable: download_parameters = { - 'headers': { - 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)' - ' Chrome/88.0.4324.104 Safari/537.36', - 'Referer': 'https://www.erome.com/', + "headers": { + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)" + " Chrome/88.0.4324.104 Safari/537.36", + "Referer": "https://www.erome.com/", }, } return lambda global_params: Resource.http_download(url, global_params | download_parameters) diff --git a/bdfr/site_downloaders/fallback_downloaders/fallback_downloader.py b/bdfr/site_downloaders/fallback_downloaders/fallback_downloader.py index deeb213..3bc615d 100644 --- a/bdfr/site_downloaders/fallback_downloaders/fallback_downloader.py +++ b/bdfr/site_downloaders/fallback_downloaders/fallback_downloader.py @@ -7,7 +7,6 @@ from bdfr.site_downloaders.base_downloader import BaseDownloader class BaseFallbackDownloader(BaseDownloader, ABC): - @staticmethod @abstractmethod def can_handle_link(url: str) -> bool: diff --git a/bdfr/site_downloaders/fallback_downloaders/ytdlp_fallback.py b/bdfr/site_downloaders/fallback_downloaders/ytdlp_fallback.py index 1225624..6109b7a 100644 --- a/bdfr/site_downloaders/fallback_downloaders/ytdlp_fallback.py +++ b/bdfr/site_downloaders/fallback_downloaders/ytdlp_fallback.py @@ -9,7 +9,9 @@ from praw.models import Submission from bdfr.exceptions import NotADownloadableLinkError from bdfr.resource import Resource from bdfr.site_authenticator import SiteAuthenticator -from bdfr.site_downloaders.fallback_downloaders.fallback_downloader import BaseFallbackDownloader +from bdfr.site_downloaders.fallback_downloaders.fallback_downloader import ( + BaseFallbackDownloader, +) from bdfr.site_downloaders.youtube import Youtube logger = logging.getLogger(__name__) @@ -24,7 +26,7 @@ class YtdlpFallback(BaseFallbackDownloader, Youtube): self.post, self.post.url, super()._download_video({}), - super().get_video_attributes(self.post.url)['ext'], + super().get_video_attributes(self.post.url)["ext"], ) return [out] diff --git a/bdfr/site_downloaders/gallery.py b/bdfr/site_downloaders/gallery.py index eeb9e0f..278932f 100644 --- a/bdfr/site_downloaders/gallery.py +++ b/bdfr/site_downloaders/gallery.py @@ -20,27 +20,27 @@ class Gallery(BaseDownloader): def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: try: - image_urls = self._get_links(self.post.gallery_data['items']) + image_urls = self._get_links(self.post.gallery_data["items"]) except (AttributeError, TypeError): try: - image_urls = self._get_links(self.post.crosspost_parent_list[0]['gallery_data']['items']) + image_urls = self._get_links(self.post.crosspost_parent_list[0]["gallery_data"]["items"]) except (AttributeError, IndexError, TypeError, KeyError): - logger.error(f'Could not find gallery data in submission {self.post.id}') - logger.exception('Gallery image find failure') - raise SiteDownloaderError('No images found in Reddit gallery') + logger.error(f"Could not find gallery data in submission {self.post.id}") + logger.exception("Gallery image find failure") + raise SiteDownloaderError("No images found in Reddit gallery") if not image_urls: - raise SiteDownloaderError('No images found in Reddit gallery') + raise SiteDownloaderError("No images found in Reddit gallery") return [Resource(self.post, url, Resource.retry_download(url)) for url in image_urls] - @ staticmethod + @staticmethod def _get_links(id_dict: list[dict]) -> list[str]: out = [] for item in id_dict: - image_id = item['media_id'] - possible_extensions = ('.jpg', '.png', '.gif', '.gifv', '.jpeg') + image_id = item["media_id"] + possible_extensions = (".jpg", ".png", ".gif", ".gifv", ".jpeg") for extension in possible_extensions: - test_url = f'https://i.redd.it/{image_id}{extension}' + test_url = f"https://i.redd.it/{image_id}{extension}" response = requests.head(test_url) if response.status_code == 200: out.append(test_url) diff --git a/bdfr/site_downloaders/gfycat.py b/bdfr/site_downloaders/gfycat.py index c8da9df..7862d33 100644 --- a/bdfr/site_downloaders/gfycat.py +++ b/bdfr/site_downloaders/gfycat.py @@ -22,21 +22,23 @@ class Gfycat(Redgifs): @staticmethod def _get_link(url: str) -> set[str]: - gfycat_id = re.match(r'.*/(.*?)/?$', url).group(1) - url = 'https://gfycat.com/' + gfycat_id + gfycat_id = re.match(r".*/(.*?)/?$", url).group(1) + url = "https://gfycat.com/" + gfycat_id response = Gfycat.retrieve_url(url) - if re.search(r'(redgifs|gifdeliverynetwork)', response.url): + if re.search(r"(redgifs|gifdeliverynetwork)", response.url): url = url.lower() # Fixes error with old gfycat/redgifs links return Redgifs._get_link(url) - soup = BeautifulSoup(response.text, 'html.parser') - content = soup.find('script', attrs={'data-react-helmet': 'true', 'type': 'application/ld+json'}) + soup = BeautifulSoup(response.text, "html.parser") + content = soup.find("script", attrs={"data-react-helmet": "true", "type": "application/ld+json"}) try: - out = json.loads(content.contents[0])['video']['contentUrl'] + out = json.loads(content.contents[0])["video"]["contentUrl"] except (IndexError, KeyError, AttributeError) as e: - raise SiteDownloaderError(f'Failed to download Gfycat link {url}: {e}') + raise SiteDownloaderError(f"Failed to download Gfycat link {url}: {e}") except json.JSONDecodeError as e: - raise SiteDownloaderError(f'Did not receive valid JSON data: {e}') - return {out,} + raise SiteDownloaderError(f"Did not receive valid JSON data: {e}") + return { + out, + } diff --git a/bdfr/site_downloaders/imgur.py b/bdfr/site_downloaders/imgur.py index 0688b10..f91e34f 100644 --- a/bdfr/site_downloaders/imgur.py +++ b/bdfr/site_downloaders/imgur.py @@ -14,7 +14,6 @@ from bdfr.site_downloaders.base_downloader import BaseDownloader class Imgur(BaseDownloader): - def __init__(self, post: Submission): super().__init__(post) self.raw_data = {} @@ -23,63 +22,63 @@ class Imgur(BaseDownloader): self.raw_data = self._get_data(self.post.url) out = [] - if 'album_images' in self.raw_data: - images = self.raw_data['album_images'] - for image in images['images']: + if "album_images" in self.raw_data: + images = self.raw_data["album_images"] + for image in images["images"]: out.append(self._compute_image_url(image)) else: out.append(self._compute_image_url(self.raw_data)) return out def _compute_image_url(self, image: dict) -> Resource: - ext = self._validate_extension(image['ext']) - if image.get('prefer_video', False): - ext = '.mp4' + ext = self._validate_extension(image["ext"]) + if image.get("prefer_video", False): + ext = ".mp4" - image_url = 'https://i.imgur.com/' + image['hash'] + ext + image_url = "https://i.imgur.com/" + image["hash"] + ext return Resource(self.post, image_url, Resource.retry_download(image_url)) @staticmethod def _get_data(link: str) -> dict: try: - imgur_id = re.match(r'.*/(.*?)(\..{0,})?$', link).group(1) - gallery = 'a/' if re.search(r'.*/(.*?)(gallery/|a/)', link) else '' - link = f'https://imgur.com/{gallery}{imgur_id}' + imgur_id = re.match(r".*/(.*?)(\..{0,})?$", link).group(1) + gallery = "a/" if re.search(r".*/(.*?)(gallery/|a/)", link) else "" + link = f"https://imgur.com/{gallery}{imgur_id}" except AttributeError: - raise SiteDownloaderError(f'Could not extract Imgur ID from {link}') + raise SiteDownloaderError(f"Could not extract Imgur ID from {link}") - res = Imgur.retrieve_url(link, cookies={'over18': '1', 'postpagebeta': '0'}) + res = Imgur.retrieve_url(link, cookies={"over18": "1", "postpagebeta": "0"}) - soup = bs4.BeautifulSoup(res.text, 'html.parser') - scripts = soup.find_all('script', attrs={'type': 'text/javascript'}) - scripts = [script.string.replace('\n', '') for script in scripts if script.string] + soup = bs4.BeautifulSoup(res.text, "html.parser") + scripts = soup.find_all("script", attrs={"type": "text/javascript"}) + scripts = [script.string.replace("\n", "") for script in scripts if script.string] - script_regex = re.compile(r'\s*\(function\(widgetFactory\)\s*{\s*widgetFactory\.mergeConfig\(\'gallery\'') + script_regex = re.compile(r"\s*\(function\(widgetFactory\)\s*{\s*widgetFactory\.mergeConfig\(\'gallery\'") chosen_script = list(filter(lambda s: re.search(script_regex, s), scripts)) if len(chosen_script) != 1: - raise SiteDownloaderError(f'Could not read page source from {link}') + raise SiteDownloaderError(f"Could not read page source from {link}") chosen_script = chosen_script[0] - outer_regex = re.compile(r'widgetFactory\.mergeConfig\(\'gallery\', ({.*})\);') - inner_regex = re.compile(r'image\s*:(.*),\s*group') + outer_regex = re.compile(r"widgetFactory\.mergeConfig\(\'gallery\', ({.*})\);") + inner_regex = re.compile(r"image\s*:(.*),\s*group") try: image_dict = re.search(outer_regex, chosen_script).group(1) image_dict = re.search(inner_regex, image_dict).group(1) except AttributeError: - raise SiteDownloaderError(f'Could not find image dictionary in page source') + raise SiteDownloaderError(f"Could not find image dictionary in page source") try: image_dict = json.loads(image_dict) except json.JSONDecodeError as e: - raise SiteDownloaderError(f'Could not parse received dict as JSON: {e}') + raise SiteDownloaderError(f"Could not parse received dict as JSON: {e}") return image_dict @staticmethod def _validate_extension(extension_suffix: str) -> str: - extension_suffix = re.sub(r'\?.*', '', extension_suffix) - possible_extensions = ('.jpg', '.png', '.mp4', '.gif') + extension_suffix = re.sub(r"\?.*", "", extension_suffix) + possible_extensions = (".jpg", ".png", ".mp4", ".gif") selection = [ext for ext in possible_extensions if ext == extension_suffix] if len(selection) == 1: return selection[0] diff --git a/bdfr/site_downloaders/pornhub.py b/bdfr/site_downloaders/pornhub.py index 748454e..db37720 100644 --- a/bdfr/site_downloaders/pornhub.py +++ b/bdfr/site_downloaders/pornhub.py @@ -20,11 +20,11 @@ class PornHub(Youtube): def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: ytdl_options = { - 'format': 'best', - 'nooverwrites': True, + "format": "best", + "nooverwrites": True, } if video_attributes := super().get_video_attributes(self.post.url): - extension = video_attributes['ext'] + extension = video_attributes["ext"] else: raise SiteDownloaderError() diff --git a/bdfr/site_downloaders/redgifs.py b/bdfr/site_downloaders/redgifs.py index dd19413..625cf7d 100644 --- a/bdfr/site_downloaders/redgifs.py +++ b/bdfr/site_downloaders/redgifs.py @@ -2,9 +2,9 @@ import json import re -import requests from typing import Optional +import requests from praw.models import Submission from bdfr.exceptions import SiteDownloaderError @@ -24,52 +24,53 @@ class Redgifs(BaseDownloader): @staticmethod def _get_link(url: str) -> set[str]: try: - redgif_id = re.match(r'.*/(.*?)(\..{0,})?$', url).group(1) + redgif_id = re.match(r".*/(.*?)(\..{0,})?$", url).group(1) except AttributeError: - raise SiteDownloaderError(f'Could not extract Redgifs ID from {url}') + raise SiteDownloaderError(f"Could not extract Redgifs ID from {url}") - auth_token = json.loads(Redgifs.retrieve_url('https://api.redgifs.com/v2/auth/temporary').text)['token'] + auth_token = json.loads(Redgifs.retrieve_url("https://api.redgifs.com/v2/auth/temporary").text)["token"] if not auth_token: - raise SiteDownloaderError('Unable to retrieve Redgifs API token') + raise SiteDownloaderError("Unable to retrieve Redgifs API token") headers = { - 'referer': 'https://www.redgifs.com/', - 'origin': 'https://www.redgifs.com', - 'content-type': 'application/json', - 'Authorization': f'Bearer {auth_token}', + "referer": "https://www.redgifs.com/", + "origin": "https://www.redgifs.com", + "content-type": "application/json", + "Authorization": f"Bearer {auth_token}", } - content = Redgifs.retrieve_url(f'https://api.redgifs.com/v2/gifs/{redgif_id}', headers=headers) + content = Redgifs.retrieve_url(f"https://api.redgifs.com/v2/gifs/{redgif_id}", headers=headers) if content is None: - raise SiteDownloaderError('Could not read the page source') + raise SiteDownloaderError("Could not read the page source") try: response_json = json.loads(content.text) except json.JSONDecodeError as e: - raise SiteDownloaderError(f'Received data was not valid JSON: {e}') + raise SiteDownloaderError(f"Received data was not valid JSON: {e}") out = set() try: - if response_json['gif']['type'] == 1: # type 1 is a video - if requests.get(response_json['gif']['urls']['hd'], headers=headers).ok: - out.add(response_json['gif']['urls']['hd']) + if response_json["gif"]["type"] == 1: # type 1 is a video + if requests.get(response_json["gif"]["urls"]["hd"], headers=headers).ok: + out.add(response_json["gif"]["urls"]["hd"]) else: - out.add(response_json['gif']['urls']['sd']) - elif response_json['gif']['type'] == 2: # type 2 is an image - if response_json['gif']['gallery']: + out.add(response_json["gif"]["urls"]["sd"]) + elif response_json["gif"]["type"] == 2: # type 2 is an image + if response_json["gif"]["gallery"]: content = Redgifs.retrieve_url( - f'https://api.redgifs.com/v2/gallery/{response_json["gif"]["gallery"]}') + f'https://api.redgifs.com/v2/gallery/{response_json["gif"]["gallery"]}' + ) response_json = json.loads(content.text) - out = {p['urls']['hd'] for p in response_json['gifs']} + out = {p["urls"]["hd"] for p in response_json["gifs"]} else: - out.add(response_json['gif']['urls']['hd']) + out.add(response_json["gif"]["urls"]["hd"]) else: raise KeyError except (KeyError, AttributeError): - raise SiteDownloaderError('Failed to find JSON data in page') + raise SiteDownloaderError("Failed to find JSON data in page") # Update subdomain if old one is returned - out = {re.sub('thumbs2', 'thumbs3', link) for link in out} - out = {re.sub('thumbs3', 'thumbs4', link) for link in out} + out = {re.sub("thumbs2", "thumbs3", link) for link in out} + out = {re.sub("thumbs3", "thumbs4", link) for link in out} return out diff --git a/bdfr/site_downloaders/self_post.py b/bdfr/site_downloaders/self_post.py index 6e4ce0e..1b76b92 100644 --- a/bdfr/site_downloaders/self_post.py +++ b/bdfr/site_downloaders/self_post.py @@ -17,27 +17,29 @@ class SelfPost(BaseDownloader): super().__init__(post) def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: - out = Resource(self.post, self.post.url, lambda: None, '.txt') - out.content = self.export_to_string().encode('utf-8') + out = Resource(self.post, self.post.url, lambda: None, ".txt") + out.content = self.export_to_string().encode("utf-8") out.create_hash() return [out] def export_to_string(self) -> str: """Self posts are formatted here""" - content = ("## [" - + self.post.fullname - + "](" - + self.post.url - + ")\n" - + self.post.selftext - + "\n\n---\n\n" - + "submitted to [r/" - + self.post.subreddit.title - + "](https://www.reddit.com/r/" - + self.post.subreddit.title - + ") by [u/" - + (self.post.author.name if self.post.author else "DELETED") - + "](https://www.reddit.com/user/" - + (self.post.author.name if self.post.author else "DELETED") - + ")") + content = ( + "## [" + + self.post.fullname + + "](" + + self.post.url + + ")\n" + + self.post.selftext + + "\n\n---\n\n" + + "submitted to [r/" + + self.post.subreddit.title + + "](https://www.reddit.com/r/" + + self.post.subreddit.title + + ") by [u/" + + (self.post.author.name if self.post.author else "DELETED") + + "](https://www.reddit.com/user/" + + (self.post.author.name if self.post.author else "DELETED") + + ")" + ) return content diff --git a/bdfr/site_downloaders/vidble.py b/bdfr/site_downloaders/vidble.py index 5cea0cb..a79ee25 100644 --- a/bdfr/site_downloaders/vidble.py +++ b/bdfr/site_downloaders/vidble.py @@ -25,30 +25,30 @@ class Vidble(BaseDownloader): try: res = self.get_links(self.post.url) except AttributeError: - raise SiteDownloaderError(f'Could not read page at {self.post.url}') + raise SiteDownloaderError(f"Could not read page at {self.post.url}") if not res: - raise SiteDownloaderError(rf'No resources found at {self.post.url}') + raise SiteDownloaderError(rf"No resources found at {self.post.url}") res = [Resource(self.post, r, Resource.retry_download(r)) for r in res] return res @staticmethod def get_links(url: str) -> set[str]: - if not re.search(r'vidble.com/(show/|album/|watch\?v)', url): - url = re.sub(r'/(\w*?)$', r'/show/\1', url) + if not re.search(r"vidble.com/(show/|album/|watch\?v)", url): + url = re.sub(r"/(\w*?)$", r"/show/\1", url) page = requests.get(url) - soup = bs4.BeautifulSoup(page.text, 'html.parser') - content_div = soup.find('div', attrs={'id': 'ContentPlaceHolder1_divContent'}) - images = content_div.find_all('img') - images = [i.get('src') for i in images] - videos = content_div.find_all('source', attrs={'type': 'video/mp4'}) - videos = [v.get('src') for v in videos] + soup = bs4.BeautifulSoup(page.text, "html.parser") + content_div = soup.find("div", attrs={"id": "ContentPlaceHolder1_divContent"}) + images = content_div.find_all("img") + images = [i.get("src") for i in images] + videos = content_div.find_all("source", attrs={"type": "video/mp4"}) + videos = [v.get("src") for v in videos] resources = filter(None, itertools.chain(images, videos)) - resources = ['https://www.vidble.com' + r for r in resources] + resources = ["https://www.vidble.com" + r for r in resources] resources = [Vidble.change_med_url(r) for r in resources] return set(resources) @staticmethod def change_med_url(url: str) -> str: - out = re.sub(r'_med(\..{3,4})$', r'\1', url) + out = re.sub(r"_med(\..{3,4})$", r"\1", url) return out diff --git a/bdfr/site_downloaders/vreddit.py b/bdfr/site_downloaders/vreddit.py index ad526b4..a71d350 100644 --- a/bdfr/site_downloaders/vreddit.py +++ b/bdfr/site_downloaders/vreddit.py @@ -22,18 +22,18 @@ class VReddit(Youtube): def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: ytdl_options = { - 'playlistend': 1, - 'nooverwrites': True, + "playlistend": 1, + "nooverwrites": True, } download_function = self._download_video(ytdl_options) - extension = self.get_video_attributes(self.post.url)['ext'] + extension = self.get_video_attributes(self.post.url)["ext"] res = Resource(self.post, self.post.url, download_function, extension) return [res] @staticmethod def get_video_attributes(url: str) -> dict: result = VReddit.get_video_data(url) - if 'ext' in result: + if "ext" in result: return result else: try: @@ -41,4 +41,4 @@ class VReddit(Youtube): return result except Exception as e: logger.exception(e) - raise NotADownloadableLinkError(f'Video info extraction failed for {url}') + raise NotADownloadableLinkError(f"Video info extraction failed for {url}") diff --git a/bdfr/site_downloaders/youtube.py b/bdfr/site_downloaders/youtube.py index 315fd0a..f4f8622 100644 --- a/bdfr/site_downloaders/youtube.py +++ b/bdfr/site_downloaders/youtube.py @@ -22,57 +22,62 @@ class Youtube(BaseDownloader): def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: ytdl_options = { - 'format': 'best', - 'playlistend': 1, - 'nooverwrites': True, + "format": "best", + "playlistend": 1, + "nooverwrites": True, } download_function = self._download_video(ytdl_options) - extension = self.get_video_attributes(self.post.url)['ext'] + extension = self.get_video_attributes(self.post.url)["ext"] res = Resource(self.post, self.post.url, download_function, extension) return [res] def _download_video(self, ytdl_options: dict) -> Callable: - yt_logger = logging.getLogger('youtube-dl') + yt_logger = logging.getLogger("youtube-dl") yt_logger.setLevel(logging.CRITICAL) - ytdl_options['quiet'] = True - ytdl_options['logger'] = yt_logger + ytdl_options["quiet"] = True + ytdl_options["logger"] = yt_logger def download(_: dict) -> bytes: with tempfile.TemporaryDirectory() as temp_dir: download_path = Path(temp_dir).resolve() - ytdl_options['outtmpl'] = str(download_path) + '/' + 'test.%(ext)s' + ytdl_options["outtmpl"] = str(download_path) + "/" + "test.%(ext)s" try: with yt_dlp.YoutubeDL(ytdl_options) as ydl: ydl.download([self.post.url]) except yt_dlp.DownloadError as e: - raise SiteDownloaderError(f'Youtube download failed: {e}') + raise SiteDownloaderError(f"Youtube download failed: {e}") downloaded_files = list(download_path.iterdir()) if downloaded_files: downloaded_file = downloaded_files[0] else: raise NotADownloadableLinkError(f"No media exists in the URL {self.post.url}") - with downloaded_file.open('rb') as file: + with downloaded_file.open("rb") as file: content = file.read() return content + return download @staticmethod def get_video_data(url: str) -> dict: - yt_logger = logging.getLogger('youtube-dl') + yt_logger = logging.getLogger("youtube-dl") yt_logger.setLevel(logging.CRITICAL) - with yt_dlp.YoutubeDL({'logger': yt_logger, }) as ydl: + with yt_dlp.YoutubeDL( + { + "logger": yt_logger, + } + ) as ydl: try: result = ydl.extract_info(url, download=False) except Exception as e: logger.exception(e) - raise NotADownloadableLinkError(f'Video info extraction failed for {url}') + raise NotADownloadableLinkError(f"Video info extraction failed for {url}") return result @staticmethod def get_video_attributes(url: str) -> dict: result = Youtube.get_video_data(url) - if 'ext' in result: + if "ext" in result: return result else: - raise NotADownloadableLinkError(f'Video info extraction failed for {url}') + raise NotADownloadableLinkError(f"Video info extraction failed for {url}") diff --git a/scripts/tests/bats b/scripts/tests/bats index e8c840b..ce5ca28 160000 --- a/scripts/tests/bats +++ b/scripts/tests/bats @@ -1 +1 @@ -Subproject commit e8c840b58f0833e23461c682655fe540aa923f85 +Subproject commit ce5ca2802fabe5dc38393240cd40e20f8928d3b0 diff --git a/scripts/tests/test_helper/bats-assert b/scripts/tests/test_helper/bats-assert index 78fa631..e0de84e 160000 --- a/scripts/tests/test_helper/bats-assert +++ b/scripts/tests/test_helper/bats-assert @@ -1 +1 @@ -Subproject commit 78fa631d1370562d2cd4a1390989e706158e7bf0 +Subproject commit e0de84e9c011223e7f88b7ccf1c929f4327097ba diff --git a/tests/archive_entry/test_comment_archive_entry.py b/tests/archive_entry/test_comment_archive_entry.py index e453d27..8e6f224 100644 --- a/tests/archive_entry/test_comment_archive_entry.py +++ b/tests/archive_entry/test_comment_archive_entry.py @@ -9,15 +9,21 @@ from bdfr.archive_entry.comment_archive_entry import CommentArchiveEntry @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_comment_id', 'expected_dict'), ( - ('gstd4hk', { - 'author': 'james_pic', - 'subreddit': 'Python', - 'submission': 'mgi4op', - 'submission_title': '76% Faster CPython', - 'distinguished': None, - }), -)) +@pytest.mark.parametrize( + ("test_comment_id", "expected_dict"), + ( + ( + "gstd4hk", + { + "author": "james_pic", + "subreddit": "Python", + "submission": "mgi4op", + "submission_title": "76% Faster CPython", + "distinguished": None, + }, + ), + ), +) def test_get_comment_details(test_comment_id: str, expected_dict: dict, reddit_instance: praw.Reddit): comment = reddit_instance.comment(id=test_comment_id) test_entry = CommentArchiveEntry(comment) @@ -27,13 +33,16 @@ def test_get_comment_details(test_comment_id: str, expected_dict: dict, reddit_i @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_comment_id', 'expected_min_comments'), ( - ('gstd4hk', 4), - ('gsvyste', 3), - ('gsxnvvb', 5), -)) +@pytest.mark.parametrize( + ("test_comment_id", "expected_min_comments"), + ( + ("gstd4hk", 4), + ("gsvyste", 3), + ("gsxnvvb", 5), + ), +) def test_get_comment_replies(test_comment_id: str, expected_min_comments: int, reddit_instance: praw.Reddit): comment = reddit_instance.comment(id=test_comment_id) test_entry = CommentArchiveEntry(comment) result = test_entry.compile() - assert len(result.get('replies')) >= expected_min_comments + assert len(result.get("replies")) >= expected_min_comments diff --git a/tests/archive_entry/test_submission_archive_entry.py b/tests/archive_entry/test_submission_archive_entry.py index 045eabd..666eec3 100644 --- a/tests/archive_entry/test_submission_archive_entry.py +++ b/tests/archive_entry/test_submission_archive_entry.py @@ -9,9 +9,7 @@ from bdfr.archive_entry.submission_archive_entry import SubmissionArchiveEntry @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_submission_id', 'min_comments'), ( - ('m3reby', 27), -)) +@pytest.mark.parametrize(("test_submission_id", "min_comments"), (("m3reby", 27),)) def test_get_comments(test_submission_id: str, min_comments: int, reddit_instance: praw.Reddit): test_submission = reddit_instance.submission(id=test_submission_id) test_archive_entry = SubmissionArchiveEntry(test_submission) @@ -21,21 +19,27 @@ def test_get_comments(test_submission_id: str, min_comments: int, reddit_instanc @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_submission_id', 'expected_dict'), ( - ('m3reby', { - 'author': 'sinjen-tos', - 'id': 'm3reby', - 'link_flair_text': 'image', - 'pinned': False, - 'spoiler': False, - 'over_18': False, - 'locked': False, - 'distinguished': None, - 'created_utc': 1615583837, - 'permalink': '/r/australia/comments/m3reby/this_little_guy_fell_out_of_a_tree_and_in_front/' - }), - # TODO: add deleted user test case -)) +@pytest.mark.parametrize( + ("test_submission_id", "expected_dict"), + ( + ( + "m3reby", + { + "author": "sinjen-tos", + "id": "m3reby", + "link_flair_text": "image", + "pinned": False, + "spoiler": False, + "over_18": False, + "locked": False, + "distinguished": None, + "created_utc": 1615583837, + "permalink": "/r/australia/comments/m3reby/this_little_guy_fell_out_of_a_tree_and_in_front/", + }, + ), + # TODO: add deleted user test case + ), +) def test_get_post_details(test_submission_id: str, expected_dict: dict, reddit_instance: praw.Reddit): test_submission = reddit_instance.submission(id=test_submission_id) test_archive_entry = SubmissionArchiveEntry(test_submission) diff --git a/tests/conftest.py b/tests/conftest.py index a61d8d5..3f871a3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,29 +11,29 @@ import pytest from bdfr.oauth2 import OAuth2TokenManager -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def reddit_instance(): rd = praw.Reddit( - client_id='U-6gk4ZCh3IeNQ', - client_secret='7CZHY6AmKweZME5s50SfDGylaPg', - user_agent='test', + client_id="U-6gk4ZCh3IeNQ", + client_secret="7CZHY6AmKweZME5s50SfDGylaPg", + user_agent="test", ) return rd -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def authenticated_reddit_instance(): - test_config_path = Path('./tests/test_config.cfg') + test_config_path = Path("./tests/test_config.cfg") if not test_config_path.exists(): - pytest.skip('Refresh token must be provided to authenticate with OAuth2') + pytest.skip("Refresh token must be provided to authenticate with OAuth2") cfg_parser = configparser.ConfigParser() cfg_parser.read(test_config_path) - if not cfg_parser.has_option('DEFAULT', 'user_token'): - pytest.skip('Refresh token must be provided to authenticate with OAuth2') + if not cfg_parser.has_option("DEFAULT", "user_token"): + pytest.skip("Refresh token must be provided to authenticate with OAuth2") token_manager = OAuth2TokenManager(cfg_parser, test_config_path) reddit_instance = praw.Reddit( - client_id=cfg_parser.get('DEFAULT', 'client_id'), - client_secret=cfg_parser.get('DEFAULT', 'client_secret'), + client_id=cfg_parser.get("DEFAULT", "client_id"), + client_secret=cfg_parser.get("DEFAULT", "client_secret"), user_agent=socket.gethostname(), token_manager=token_manager, ) diff --git a/tests/integration_tests/test_archive_integration.py b/tests/integration_tests/test_archive_integration.py index caf6fcb..f10f37c 100644 --- a/tests/integration_tests/test_archive_integration.py +++ b/tests/integration_tests/test_archive_integration.py @@ -10,67 +10,78 @@ from click.testing import CliRunner from bdfr.__main__ import cli -does_test_config_exist = Path('./tests/test_config.cfg').exists() +does_test_config_exist = Path("./tests/test_config.cfg").exists() def copy_test_config(run_path: Path): - shutil.copy(Path('./tests/test_config.cfg'), Path(run_path, 'test_config.cfg')) + shutil.copy(Path("./tests/test_config.cfg"), Path(run_path, "test_config.cfg")) def create_basic_args_for_archive_runner(test_args: list[str], run_path: Path): copy_test_config(run_path) out = [ - 'archive', + "archive", str(run_path), - '-v', - '--config', str(Path(run_path, 'test_config.cfg')), - '--log', str(Path(run_path, 'test_log.txt')), + "-v", + "--config", + str(Path(run_path, "test_config.cfg")), + "--log", + str(Path(run_path, "test_log.txt")), ] + test_args return out @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['-l', 'gstd4hk'], - ['-l', 'm2601g', '-f', 'yaml'], - ['-l', 'n60t4c', '-f', 'xml'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["-l", "gstd4hk"], + ["-l", "m2601g", "-f", "yaml"], + ["-l", "n60t4c", "-f", "xml"], + ), +) def test_cli_archive_single(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_archive_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert re.search(r'Writing entry .*? to file in .*? format', result.output) + assert re.search(r"Writing entry .*? to file in .*? format", result.output) @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--subreddit', 'Mindustry', '-L', 25], - ['--subreddit', 'Mindustry', '-L', 25, '--format', 'xml'], - ['--subreddit', 'Mindustry', '-L', 25, '--format', 'yaml'], - ['--subreddit', 'Mindustry', '-L', 25, '--sort', 'new'], - ['--subreddit', 'Mindustry', '-L', 25, '--time', 'day'], - ['--subreddit', 'Mindustry', '-L', 25, '--time', 'day', '--sort', 'new'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["--subreddit", "Mindustry", "-L", 25], + ["--subreddit", "Mindustry", "-L", 25, "--format", "xml"], + ["--subreddit", "Mindustry", "-L", 25, "--format", "yaml"], + ["--subreddit", "Mindustry", "-L", 25, "--sort", "new"], + ["--subreddit", "Mindustry", "-L", 25, "--time", "day"], + ["--subreddit", "Mindustry", "-L", 25, "--time", "day", "--sort", "new"], + ), +) def test_cli_archive_subreddit(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_archive_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert re.search(r'Writing entry .*? to file in .*? format', result.output) + assert re.search(r"Writing entry .*? to file in .*? format", result.output) @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--user', 'me', '--authenticate', '--all-comments', '-L', '10'], - ['--user', 'me', '--user', 'djnish', '--authenticate', '--all-comments', '-L', '10'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["--user", "me", "--authenticate", "--all-comments", "-L", "10"], + ["--user", "me", "--user", "djnish", "--authenticate", "--all-comments", "-L", "10"], + ), +) def test_cli_archive_all_user_comments(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_archive_runner(test_args, tmp_path) @@ -80,89 +91,88 @@ def test_cli_archive_all_user_comments(test_args: list[str], tmp_path: Path): @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--comment-context', '--link', 'gxqapql'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize("test_args", (["--comment-context", "--link", "gxqapql"],)) def test_cli_archive_full_context(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_archive_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'Converting comment' in result.output + assert "Converting comment" in result.output @pytest.mark.online @pytest.mark.reddit @pytest.mark.slow -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--subreddit', 'all', '-L', 100], - ['--subreddit', 'all', '-L', 100, '--sort', 'new'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["--subreddit", "all", "-L", 100], + ["--subreddit", "all", "-L", 100, "--sort", "new"], + ), +) def test_cli_archive_long(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_archive_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert re.search(r'Writing entry .*? to file in .*? format', result.output) + assert re.search(r"Writing entry .*? to file in .*? format", result.output) @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--ignore-user', 'ArjanEgges', '-l', 'm3hxzd'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize("test_args", (["--ignore-user", "ArjanEgges", "-l", "m3hxzd"],)) def test_cli_archive_ignore_user(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_archive_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'being an ignored user' in result.output - assert 'Attempting to archive submission' not in result.output + assert "being an ignored user" in result.output + assert "Attempting to archive submission" not in result.output @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--file-scheme', '{TITLE}', '-l', 'suy011'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize("test_args", (["--file-scheme", "{TITLE}", "-l", "suy011"],)) def test_cli_archive_file_format(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_archive_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'Attempting to archive submission' in result.output - assert re.search('format at /.+?/Judge says Trump and two adult', result.output) + assert "Attempting to archive submission" in result.output + assert re.search("format at /.+?/Judge says Trump and two adult", result.output) @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['-l', 'm2601g', '--exclude-id', 'm2601g'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize("test_args", (["-l", "m2601g", "--exclude-id", "m2601g"],)) def test_cli_archive_links_exclusion(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_archive_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'in exclusion list' in result.output - assert 'Attempting to archive' not in result.output + assert "in exclusion list" in result.output + assert "Attempting to archive" not in result.output + @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['-l', 'ijy4ch'], # user deleted post - ['-l', 'kw4wjm'], # post from banned subreddit -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["-l", "ijy4ch"], # user deleted post + ["-l", "kw4wjm"], # post from banned subreddit + ), +) def test_cli_archive_soft_fail(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_archive_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'failed to be archived due to a PRAW exception' in result.output - assert 'Attempting to archive' not in result.output + assert "failed to be archived due to a PRAW exception" in result.output + assert "Attempting to archive" not in result.output diff --git a/tests/integration_tests/test_clone_integration.py b/tests/integration_tests/test_clone_integration.py index 8046687..e8dc008 100644 --- a/tests/integration_tests/test_clone_integration.py +++ b/tests/integration_tests/test_clone_integration.py @@ -9,54 +9,62 @@ from click.testing import CliRunner from bdfr.__main__ import cli -does_test_config_exist = Path('./tests/test_config.cfg').exists() +does_test_config_exist = Path("./tests/test_config.cfg").exists() def copy_test_config(run_path: Path): - shutil.copy(Path('./tests/test_config.cfg'), Path(run_path, 'test_config.cfg')) + shutil.copy(Path("./tests/test_config.cfg"), Path(run_path, "test_config.cfg")) def create_basic_args_for_cloner_runner(test_args: list[str], tmp_path: Path): copy_test_config(tmp_path) out = [ - 'clone', + "clone", str(tmp_path), - '-v', - '--config', str(Path(tmp_path, 'test_config.cfg')), - '--log', str(Path(tmp_path, 'test_log.txt')), + "-v", + "--config", + str(Path(tmp_path, "test_config.cfg")), + "--log", + str(Path(tmp_path, "test_log.txt")), ] + test_args return out @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['-l', '6l7778'], - ['-s', 'TrollXChromosomes/', '-L', 1], - ['-l', 'eiajjw'], - ['-l', 'xl0lhi'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["-l", "6l7778"], + ["-s", "TrollXChromosomes/", "-L", 1], + ["-l", "eiajjw"], + ["-l", "xl0lhi"], + ), +) def test_cli_scrape_general(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_cloner_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'Downloaded submission' in result.output - assert 'Record for entry item' in result.output + assert "Downloaded submission" in result.output + assert "Record for entry item" in result.output @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['-l', 'ijy4ch'], # user deleted post - ['-l', 'kw4wjm'], # post from banned subreddit -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["-l", "ijy4ch"], # user deleted post + ["-l", "kw4wjm"], # post from banned subreddit + ), +) def test_cli_scrape_soft_fail(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_cloner_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'Downloaded submission' not in result.output - assert 'Record for entry item' not in result.output + assert "Downloaded submission" not in result.output + assert "Record for entry item" not in result.output diff --git a/tests/integration_tests/test_download_integration.py b/tests/integration_tests/test_download_integration.py index 83f972d..2ab38a0 100644 --- a/tests/integration_tests/test_download_integration.py +++ b/tests/integration_tests/test_download_integration.py @@ -9,97 +9,107 @@ from click.testing import CliRunner from bdfr.__main__ import cli -does_test_config_exist = Path('./tests/test_config.cfg').exists() +does_test_config_exist = Path("./tests/test_config.cfg").exists() def copy_test_config(run_path: Path): - shutil.copy(Path('./tests/test_config.cfg'), Path(run_path, './test_config.cfg')) + shutil.copy(Path("./tests/test_config.cfg"), Path(run_path, "./test_config.cfg")) def create_basic_args_for_download_runner(test_args: list[str], run_path: Path): copy_test_config(run_path) out = [ - 'download', str(run_path), - '-v', - '--config', str(Path(run_path, './test_config.cfg')), - '--log', str(Path(run_path, 'test_log.txt')), + "download", + str(run_path), + "-v", + "--config", + str(Path(run_path, "./test_config.cfg")), + "--log", + str(Path(run_path, "test_log.txt")), ] + test_args return out @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['-s', 'Mindustry', '-L', 3], - ['-s', 'r/Mindustry', '-L', 3], - ['-s', 'r/mindustry', '-L', 3], - ['-s', 'mindustry', '-L', 3], - ['-s', 'https://www.reddit.com/r/TrollXChromosomes/', '-L', 3], - ['-s', 'r/TrollXChromosomes/', '-L', 3], - ['-s', 'TrollXChromosomes/', '-L', 3], - ['-s', 'trollxchromosomes', '-L', 3], - ['-s', 'trollxchromosomes,mindustry,python', '-L', 3], - ['-s', 'trollxchromosomes, mindustry, python', '-L', 3], - ['-s', 'trollxchromosomes', '-L', 3, '--time', 'day'], - ['-s', 'trollxchromosomes', '-L', 3, '--sort', 'new'], - ['-s', 'trollxchromosomes', '-L', 3, '--time', 'day', '--sort', 'new'], - ['-s', 'trollxchromosomes', '-L', 3, '--search', 'women'], - ['-s', 'trollxchromosomes', '-L', 3, '--time', 'day', '--search', 'women'], - ['-s', 'trollxchromosomes', '-L', 3, '--sort', 'new', '--search', 'women'], - ['-s', 'trollxchromosomes', '-L', 3, '--time', 'day', '--sort', 'new', '--search', 'women'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["-s", "Mindustry", "-L", 3], + ["-s", "r/Mindustry", "-L", 3], + ["-s", "r/mindustry", "-L", 3], + ["-s", "mindustry", "-L", 3], + ["-s", "https://www.reddit.com/r/TrollXChromosomes/", "-L", 3], + ["-s", "r/TrollXChromosomes/", "-L", 3], + ["-s", "TrollXChromosomes/", "-L", 3], + ["-s", "trollxchromosomes", "-L", 3], + ["-s", "trollxchromosomes,mindustry,python", "-L", 3], + ["-s", "trollxchromosomes, mindustry, python", "-L", 3], + ["-s", "trollxchromosomes", "-L", 3, "--time", "day"], + ["-s", "trollxchromosomes", "-L", 3, "--sort", "new"], + ["-s", "trollxchromosomes", "-L", 3, "--time", "day", "--sort", "new"], + ["-s", "trollxchromosomes", "-L", 3, "--search", "women"], + ["-s", "trollxchromosomes", "-L", 3, "--time", "day", "--search", "women"], + ["-s", "trollxchromosomes", "-L", 3, "--sort", "new", "--search", "women"], + ["-s", "trollxchromosomes", "-L", 3, "--time", "day", "--sort", "new", "--search", "women"], + ), +) def test_cli_download_subreddits(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'Added submissions from subreddit ' in result.output - assert 'Downloaded submission' in result.output + assert "Added submissions from subreddit " in result.output + assert "Downloaded submission" in result.output @pytest.mark.online @pytest.mark.reddit @pytest.mark.slow @pytest.mark.authenticated -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['-s', 'hentai', '-L', 10, '--search', 'red', '--authenticate'], - ['--authenticate', '--subscribed', '-L', 10], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["-s", "hentai", "-L", 10, "--search", "red", "--authenticate"], + ["--authenticate", "--subscribed", "-L", 10], + ), +) def test_cli_download_search_subreddits_authenticated(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'Added submissions from subreddit ' in result.output - assert 'Downloaded submission' in result.output + assert "Added submissions from subreddit " in result.output + assert "Downloaded submission" in result.output @pytest.mark.online @pytest.mark.reddit @pytest.mark.authenticated -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--subreddit', 'friends', '-L', 10, '--authenticate'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize("test_args", (["--subreddit", "friends", "-L", 10, "--authenticate"],)) def test_cli_download_user_specific_subreddits(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'Added submissions from subreddit ' in result.output + assert "Added submissions from subreddit " in result.output @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['-l', '6l7778'], - ['-l', 'https://reddit.com/r/EmpireDidNothingWrong/comments/6l7778/technically_true/'], - ['-l', 'm3hxzd'], # Really long title used to overflow filename limit - ['-l', 'm5bqkf'], # Resource leading to a 404 -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["-l", "6l7778"], + ["-l", "https://reddit.com/r/EmpireDidNothingWrong/comments/6l7778/technically_true/"], + ["-l", "m3hxzd"], # Really long title used to overflow filename limit + ["-l", "m5bqkf"], # Resource leading to a 404 + ), +) def test_cli_download_links(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) @@ -109,64 +119,66 @@ def test_cli_download_links(test_args: list[str], tmp_path: Path): @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--user', 'helen_darten', '-m', 'cuteanimalpics', '-L', 10], - ['--user', 'helen_darten', '-m', 'cuteanimalpics', '-L', 10, '--sort', 'rising'], - ['--user', 'helen_darten', '-m', 'cuteanimalpics', '-L', 10, '--time', 'week'], - ['--user', 'helen_darten', '-m', 'cuteanimalpics', '-L', 10, '--time', 'week', '--sort', 'rising'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["--user", "helen_darten", "-m", "cuteanimalpics", "-L", 10], + ["--user", "helen_darten", "-m", "cuteanimalpics", "-L", 10, "--sort", "rising"], + ["--user", "helen_darten", "-m", "cuteanimalpics", "-L", 10, "--time", "week"], + ["--user", "helen_darten", "-m", "cuteanimalpics", "-L", 10, "--time", "week", "--sort", "rising"], + ), +) def test_cli_download_multireddit(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'Added submissions from multireddit ' in result.output + assert "Added submissions from multireddit " in result.output @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--user', 'helen_darten', '-m', 'xxyyzzqwerty', '-L', 10], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize("test_args", (["--user", "helen_darten", "-m", "xxyyzzqwerty", "-L", 10],)) def test_cli_download_multireddit_nonexistent(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'Failed to get submissions for multireddit' in result.output - assert 'received 404 HTTP response' in result.output + assert "Failed to get submissions for multireddit" in result.output + assert "received 404 HTTP response" in result.output @pytest.mark.online @pytest.mark.reddit @pytest.mark.authenticated -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--user', 'djnish', '--submitted', '--user', 'FriesWithThat', '-L', 10], - ['--user', 'me', '--upvoted', '--authenticate', '-L', 10], - ['--user', 'me', '--saved', '--authenticate', '-L', 10], - ['--user', 'me', '--submitted', '--authenticate', '-L', 10], - ['--user', 'djnish', '--submitted', '-L', 10], - ['--user', 'djnish', '--submitted', '-L', 10, '--time', 'month'], - ['--user', 'djnish', '--submitted', '-L', 10, '--sort', 'controversial'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["--user", "djnish", "--submitted", "--user", "FriesWithThat", "-L", 10], + ["--user", "me", "--upvoted", "--authenticate", "-L", 10], + ["--user", "me", "--saved", "--authenticate", "-L", 10], + ["--user", "me", "--submitted", "--authenticate", "-L", 10], + ["--user", "djnish", "--submitted", "-L", 10], + ["--user", "djnish", "--submitted", "-L", 10, "--time", "month"], + ["--user", "djnish", "--submitted", "-L", 10, "--sort", "controversial"], + ), +) def test_cli_download_user_data_good(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'Downloaded submission ' in result.output + assert "Downloaded submission " in result.output @pytest.mark.online @pytest.mark.reddit @pytest.mark.authenticated -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--user', 'me', '-L', 10, '--folder-scheme', ''], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize("test_args", (["--user", "me", "-L", 10, "--folder-scheme", ""],)) def test_cli_download_user_data_bad_me_unauthenticated(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) @@ -177,42 +189,41 @@ def test_cli_download_user_data_bad_me_unauthenticated(test_args: list[str], tmp @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--subreddit', 'python', '-L', 1, '--search-existing'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize("test_args", (["--subreddit", "python", "-L", 1, "--search-existing"],)) def test_cli_download_search_existing(test_args: list[str], tmp_path: Path): - Path(tmp_path, 'test.txt').touch() + Path(tmp_path, "test.txt").touch() runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'Calculating hashes for' in result.output + assert "Calculating hashes for" in result.output @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--subreddit', 'tumblr', '-L', '25', '--skip', 'png', '--skip', 'jpg'], - ['--subreddit', 'MaliciousCompliance', '-L', '25', '--skip', 'txt'], - ['--subreddit', 'tumblr', '-L', '10', '--skip-domain', 'i.redd.it'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["--subreddit", "tumblr", "-L", "25", "--skip", "png", "--skip", "jpg"], + ["--subreddit", "MaliciousCompliance", "-L", "25", "--skip", "txt"], + ["--subreddit", "tumblr", "-L", "10", "--skip-domain", "i.redd.it"], + ), +) def test_cli_download_download_filters(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert any((string in result.output for string in ('Download filter removed ', 'filtered due to URL'))) + assert any((string in result.output for string in ("Download filter removed ", "filtered due to URL"))) @pytest.mark.online @pytest.mark.reddit @pytest.mark.slow -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--subreddit', 'all', '-L', '100', '--sort', 'new'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize("test_args", (["--subreddit", "all", "-L", "100", "--sort", "new"],)) def test_cli_download_long(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) @@ -223,34 +234,40 @@ def test_cli_download_long(test_args: list[str], tmp_path: Path): @pytest.mark.online @pytest.mark.reddit @pytest.mark.slow -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--user', 'sdclhgsolgjeroij', '--submitted', '-L', 10], - ['--user', 'me', '--upvoted', '-L', 10], - ['--user', 'sdclhgsolgjeroij', '--upvoted', '-L', 10], - ['--subreddit', 'submitters', '-L', 10], # Private subreddit - ['--subreddit', 'donaldtrump', '-L', 10], # Banned subreddit - ['--user', 'djnish', '--user', 'helen_darten', '-m', 'cuteanimalpics', '-L', 10], - ['--subreddit', 'friends', '-L', 10], - ['-l', 'ijy4ch'], # user deleted post - ['-l', 'kw4wjm'], # post from banned subreddit -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["--user", "sdclhgsolgjeroij", "--submitted", "-L", 10], + ["--user", "me", "--upvoted", "-L", 10], + ["--user", "sdclhgsolgjeroij", "--upvoted", "-L", 10], + ["--subreddit", "submitters", "-L", 10], # Private subreddit + ["--subreddit", "donaldtrump", "-L", 10], # Banned subreddit + ["--user", "djnish", "--user", "helen_darten", "-m", "cuteanimalpics", "-L", 10], + ["--subreddit", "friends", "-L", 10], + ["-l", "ijy4ch"], # user deleted post + ["-l", "kw4wjm"], # post from banned subreddit + ), +) def test_cli_download_soft_fail(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'Downloaded' not in result.output + assert "Downloaded" not in result.output @pytest.mark.online @pytest.mark.reddit @pytest.mark.slow -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--time', 'random'], - ['--sort', 'random'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["--time", "random"], + ["--sort", "random"], + ), +) def test_cli_download_hard_fail(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) @@ -260,114 +277,122 @@ def test_cli_download_hard_fail(test_args: list[str], tmp_path: Path): def test_cli_download_use_default_config(tmp_path: Path): runner = CliRunner() - test_args = ['download', '-vv', str(tmp_path)] + test_args = ["download", "-vv", str(tmp_path)] result = runner.invoke(cli, test_args) assert result.exit_code == 0 @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['-l', '6l7778', '--exclude-id', '6l7778'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize("test_args", (["-l", "6l7778", "--exclude-id", "6l7778"],)) def test_cli_download_links_exclusion(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'in exclusion list' in result.output - assert 'Downloaded submission ' not in result.output + assert "in exclusion list" in result.output + assert "Downloaded submission " not in result.output @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['-l', '6l7778', '--skip-subreddit', 'EmpireDidNothingWrong'], - ['-s', 'trollxchromosomes', '--skip-subreddit', 'trollxchromosomes', '-L', '3'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["-l", "6l7778", "--skip-subreddit", "EmpireDidNothingWrong"], + ["-s", "trollxchromosomes", "--skip-subreddit", "trollxchromosomes", "-L", "3"], + ), +) def test_cli_download_subreddit_exclusion(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'in skip list' in result.output - assert 'Downloaded submission ' not in result.output + assert "in skip list" in result.output + assert "Downloaded submission " not in result.output @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--file-scheme', '{TITLE}'], - ['--file-scheme', '{TITLE}_test_{SUBREDDIT}'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["--file-scheme", "{TITLE}"], + ["--file-scheme", "{TITLE}_test_{SUBREDDIT}"], + ), +) def test_cli_download_file_scheme_warning(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'Some files might not be downloaded due to name conflicts' in result.output + assert "Some files might not be downloaded due to name conflicts" in result.output @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['-l', 'n9w9fo', '--disable-module', 'SelfPost'], - ['-l', 'nnb9vs', '--disable-module', 'VReddit'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["-l", "n9w9fo", "--disable-module", "SelfPost"], + ["-l", "nnb9vs", "--disable-module", "VReddit"], + ), +) def test_cli_download_disable_modules(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'skipped due to disabled module' in result.output - assert 'Downloaded submission' not in result.output + assert "skipped due to disabled module" in result.output + assert "Downloaded submission" not in result.output @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") def test_cli_download_include_id_file(tmp_path: Path): - test_file = Path(tmp_path, 'include.txt') - test_args = ['--include-id-file', str(test_file)] - test_file.write_text('odr9wg\nody576') + test_file = Path(tmp_path, "include.txt") + test_args = ["--include-id-file", str(test_file)] + test_file.write_text("odr9wg\nody576") runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'Downloaded submission' in result.output + assert "Downloaded submission" in result.output @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize('test_args', ( - ['--ignore-user', 'ArjanEgges', '-l', 'm3hxzd'], -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize("test_args", (["--ignore-user", "ArjanEgges", "-l", "m3hxzd"],)) def test_cli_download_ignore_user(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert 'Downloaded submission' not in result.output - assert 'being an ignored user' in result.output + assert "Downloaded submission" not in result.output + assert "being an ignored user" in result.output @pytest.mark.online @pytest.mark.reddit -@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') -@pytest.mark.parametrize(('test_args', 'was_filtered'), ( - (['-l', 'ljyy27', '--min-score', '50'], True), - (['-l', 'ljyy27', '--min-score', '1'], False), - (['-l', 'ljyy27', '--max-score', '1'], True), - (['-l', 'ljyy27', '--max-score', '100'], False), -)) +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + ("test_args", "was_filtered"), + ( + (["-l", "ljyy27", "--min-score", "50"], True), + (["-l", "ljyy27", "--min-score", "1"], False), + (["-l", "ljyy27", "--max-score", "1"], True), + (["-l", "ljyy27", "--max-score", "100"], False), + ), +) def test_cli_download_score_filter(test_args: list[str], was_filtered: bool, tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) assert result.exit_code == 0 - assert ('filtered due to score' in result.output) == was_filtered + assert ("filtered due to score" in result.output) == was_filtered diff --git a/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py b/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py index 29e72c5..9823d08 100644 --- a/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py +++ b/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py @@ -10,22 +10,23 @@ from bdfr.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallb @pytest.mark.online -@pytest.mark.parametrize(('test_url', 'expected'), ( - ('https://www.reddit.com/r/specializedtools/comments/n2nw5m/bamboo_splitter/', True), - ('https://www.youtube.com/watch?v=P19nvJOmqCc', True), - ('https://www.example.com/test', False), - ('https://milesmatrix.bandcamp.com/album/la-boum/', False), - ('https://v.redd.it/dlr54z8p182a1', True), -)) +@pytest.mark.parametrize( + ("test_url", "expected"), + ( + ("https://www.reddit.com/r/specializedtools/comments/n2nw5m/bamboo_splitter/", True), + ("https://www.youtube.com/watch?v=P19nvJOmqCc", True), + ("https://www.example.com/test", False), + ("https://milesmatrix.bandcamp.com/album/la-boum/", False), + ("https://v.redd.it/dlr54z8p182a1", True), + ), +) def test_can_handle_link(test_url: str, expected: bool): result = YtdlpFallback.can_handle_link(test_url) assert result == expected @pytest.mark.online -@pytest.mark.parametrize('test_url', ( - 'https://milesmatrix.bandcamp.com/album/la-boum/', -)) +@pytest.mark.parametrize("test_url", ("https://milesmatrix.bandcamp.com/album/la-boum/",)) def test_info_extraction_bad(test_url: str): with pytest.raises(NotADownloadableLinkError): YtdlpFallback.get_video_attributes(test_url) @@ -33,12 +34,18 @@ def test_info_extraction_bad(test_url: str): @pytest.mark.online @pytest.mark.slow -@pytest.mark.parametrize(('test_url', 'expected_hash'), ( - ('https://streamable.com/dt46y', 'b7e465adaade5f2b6d8c2b4b7d0a2878'), - ('https://streamable.com/t8sem', '49b2d1220c485455548f1edbc05d4ecf'), - ('https://www.reddit.com/r/specializedtools/comments/n2nw5m/bamboo_splitter/', '6c6ff46e04b4e33a755ae2a9b5a45ac5'), - ('https://v.redd.it/9z1dnk3xr5k61', '226cee353421c7aefb05c92424cc8cdd'), -)) +@pytest.mark.parametrize( + ("test_url", "expected_hash"), + ( + ("https://streamable.com/dt46y", "b7e465adaade5f2b6d8c2b4b7d0a2878"), + ("https://streamable.com/t8sem", "49b2d1220c485455548f1edbc05d4ecf"), + ( + "https://www.reddit.com/r/specializedtools/comments/n2nw5m/bamboo_splitter/", + "6c6ff46e04b4e33a755ae2a9b5a45ac5", + ), + ("https://v.redd.it/9z1dnk3xr5k61", "226cee353421c7aefb05c92424cc8cdd"), + ), +) def test_find_resources(test_url: str, expected_hash: str): test_submission = MagicMock() test_submission.url = test_url diff --git a/tests/site_downloaders/test_delay_for_reddit.py b/tests/site_downloaders/test_delay_for_reddit.py index 5e0e1c8..65d080c 100644 --- a/tests/site_downloaders/test_delay_for_reddit.py +++ b/tests/site_downloaders/test_delay_for_reddit.py @@ -10,10 +10,13 @@ from bdfr.site_downloaders.delay_for_reddit import DelayForReddit @pytest.mark.online -@pytest.mark.parametrize(('test_url', 'expected_hash'), ( - ('https://www.delayforreddit.com/dfr/calvin6123/MjU1Njc5NQ==', '3300f28c2f9358d05667985c9c04210d'), - ('https://www.delayforreddit.com/dfr/RoXs_26/NDAwMzAyOQ==', '09b7b01719dff45ab197bdc08b90f78a'), -)) +@pytest.mark.parametrize( + ("test_url", "expected_hash"), + ( + ("https://www.delayforreddit.com/dfr/calvin6123/MjU1Njc5NQ==", "3300f28c2f9358d05667985c9c04210d"), + ("https://www.delayforreddit.com/dfr/RoXs_26/NDAwMzAyOQ==", "09b7b01719dff45ab197bdc08b90f78a"), + ), +) def test_download_resource(test_url: str, expected_hash: str): mock_submission = Mock() mock_submission.url = test_url diff --git a/tests/site_downloaders/test_direct.py b/tests/site_downloaders/test_direct.py index 56f90fc..b652d9a 100644 --- a/tests/site_downloaders/test_direct.py +++ b/tests/site_downloaders/test_direct.py @@ -10,10 +10,13 @@ from bdfr.site_downloaders.direct import Direct @pytest.mark.online -@pytest.mark.parametrize(('test_url', 'expected_hash'), ( - ('https://giant.gfycat.com/DefinitiveCanineCrayfish.mp4', '48f9bd4dbec1556d7838885612b13b39'), - ('https://giant.gfycat.com/DazzlingSilkyIguana.mp4', '808941b48fc1e28713d36dd7ed9dc648'), -)) +@pytest.mark.parametrize( + ("test_url", "expected_hash"), + ( + ("https://giant.gfycat.com/DefinitiveCanineCrayfish.mp4", "48f9bd4dbec1556d7838885612b13b39"), + ("https://giant.gfycat.com/DazzlingSilkyIguana.mp4", "808941b48fc1e28713d36dd7ed9dc648"), + ), +) def test_download_resource(test_url: str, expected_hash: str): mock_submission = Mock() mock_submission.url = test_url diff --git a/tests/site_downloaders/test_download_factory.py b/tests/site_downloaders/test_download_factory.py index bcfc704..581656d 100644 --- a/tests/site_downloaders/test_download_factory.py +++ b/tests/site_downloaders/test_download_factory.py @@ -21,67 +21,82 @@ from bdfr.site_downloaders.youtube import Youtube @pytest.mark.online -@pytest.mark.parametrize(('test_submission_url', 'expected_class'), ( - ('https://www.reddit.com/r/TwoXChromosomes/comments/lu29zn/i_refuse_to_live_my_life' - '_in_anything_but_comfort/', SelfPost), - ('https://i.redd.it/affyv0axd5k61.png', Direct), - ('https://i.imgur.com/bZx1SJQ.jpg', Imgur), - ('https://imgur.com/BuzvZwb.gifv', Imgur), - ('https://imgur.com/a/MkxAzeg', Imgur), - ('https://m.imgur.com/a/py3RW0j', Imgur), - ('https://www.reddit.com/gallery/lu93m7', Gallery), - ('https://gfycat.com/concretecheerfulfinwhale', Gfycat), - ('https://www.erome.com/a/NWGw0F09', Erome), - ('https://youtube.com/watch?v=Gv8Wz74FjVA', Youtube), - ('https://redgifs.com/watch/courageousimpeccablecanvasback', Redgifs), - ('https://www.gifdeliverynetwork.com/repulsivefinishedandalusianhorse', Redgifs), - ('https://youtu.be/DevfjHOhuFc', Youtube), - ('https://m.youtube.com/watch?v=kr-FeojxzUM', Youtube), - ('https://dynasty-scans.com/system/images_images/000/017/819/original/80215103_p0.png?1612232781', Direct), - ('https://v.redd.it/9z1dnk3xr5k61', VReddit), - ('https://streamable.com/dt46y', YtdlpFallback), - ('https://vimeo.com/channels/31259/53576664', YtdlpFallback), - ('http://video.pbs.org/viralplayer/2365173446/', YtdlpFallback), - ('https://www.pornhub.com/view_video.php?viewkey=ph5a2ee0461a8d0', PornHub), - ('https://www.patreon.com/posts/minecart-track-59346560', Gallery), -)) +@pytest.mark.parametrize( + ("test_submission_url", "expected_class"), + ( + ( + "https://www.reddit.com/r/TwoXChromosomes/comments/lu29zn/i_refuse_to_live_my_life" + "_in_anything_but_comfort/", + SelfPost, + ), + ("https://i.redd.it/affyv0axd5k61.png", Direct), + ("https://i.imgur.com/bZx1SJQ.jpg", Imgur), + ("https://imgur.com/BuzvZwb.gifv", Imgur), + ("https://imgur.com/a/MkxAzeg", Imgur), + ("https://m.imgur.com/a/py3RW0j", Imgur), + ("https://www.reddit.com/gallery/lu93m7", Gallery), + ("https://gfycat.com/concretecheerfulfinwhale", Gfycat), + ("https://www.erome.com/a/NWGw0F09", Erome), + ("https://youtube.com/watch?v=Gv8Wz74FjVA", Youtube), + ("https://redgifs.com/watch/courageousimpeccablecanvasback", Redgifs), + ("https://www.gifdeliverynetwork.com/repulsivefinishedandalusianhorse", Redgifs), + ("https://youtu.be/DevfjHOhuFc", Youtube), + ("https://m.youtube.com/watch?v=kr-FeojxzUM", Youtube), + ("https://dynasty-scans.com/system/images_images/000/017/819/original/80215103_p0.png?1612232781", Direct), + ("https://v.redd.it/9z1dnk3xr5k61", VReddit), + ("https://streamable.com/dt46y", YtdlpFallback), + ("https://vimeo.com/channels/31259/53576664", YtdlpFallback), + ("http://video.pbs.org/viralplayer/2365173446/", YtdlpFallback), + ("https://www.pornhub.com/view_video.php?viewkey=ph5a2ee0461a8d0", PornHub), + ("https://www.patreon.com/posts/minecart-track-59346560", Gallery), + ), +) def test_factory_lever_good(test_submission_url: str, expected_class: BaseDownloader, reddit_instance: praw.Reddit): result = DownloadFactory.pull_lever(test_submission_url) assert result is expected_class -@pytest.mark.parametrize('test_url', ( - 'random.com', - 'bad', - 'https://www.google.com/', - 'https://www.google.com', - 'https://www.google.com/test', - 'https://www.google.com/test/', -)) +@pytest.mark.parametrize( + "test_url", + ( + "random.com", + "bad", + "https://www.google.com/", + "https://www.google.com", + "https://www.google.com/test", + "https://www.google.com/test/", + ), +) def test_factory_lever_bad(test_url: str): with pytest.raises(NotADownloadableLinkError): DownloadFactory.pull_lever(test_url) -@pytest.mark.parametrize(('test_url', 'expected'), ( - ('www.test.com/test.png', 'test.com/test.png'), - ('www.test.com/test.png?test_value=random', 'test.com/test.png'), - ('https://youtube.com/watch?v=Gv8Wz74FjVA', 'youtube.com/watch'), - ('https://i.imgur.com/BuzvZwb.gifv', 'i.imgur.com/BuzvZwb.gifv'), -)) +@pytest.mark.parametrize( + ("test_url", "expected"), + ( + ("www.test.com/test.png", "test.com/test.png"), + ("www.test.com/test.png?test_value=random", "test.com/test.png"), + ("https://youtube.com/watch?v=Gv8Wz74FjVA", "youtube.com/watch"), + ("https://i.imgur.com/BuzvZwb.gifv", "i.imgur.com/BuzvZwb.gifv"), + ), +) def test_sanitise_url(test_url: str, expected: str): result = DownloadFactory.sanitise_url(test_url) assert result == expected -@pytest.mark.parametrize(('test_url', 'expected'), ( - ('www.example.com/test.asp', True), - ('www.example.com/test.html', True), - ('www.example.com/test.js', True), - ('www.example.com/test.xhtml', True), - ('www.example.com/test.mp4', False), - ('www.example.com/test.png', False), -)) +@pytest.mark.parametrize( + ("test_url", "expected"), + ( + ("www.example.com/test.asp", True), + ("www.example.com/test.html", True), + ("www.example.com/test.js", True), + ("www.example.com/test.xhtml", True), + ("www.example.com/test.mp4", False), + ("www.example.com/test.png", False), + ), +) def test_is_web_resource(test_url: str, expected: bool): result = DownloadFactory.is_web_resource(test_url) assert result == expected diff --git a/tests/site_downloaders/test_erome.py b/tests/site_downloaders/test_erome.py index 2f3701d..1baeb66 100644 --- a/tests/site_downloaders/test_erome.py +++ b/tests/site_downloaders/test_erome.py @@ -9,31 +9,38 @@ from bdfr.site_downloaders.erome import Erome @pytest.mark.online -@pytest.mark.parametrize(('test_url', 'expected_urls'), ( - ('https://www.erome.com/a/vqtPuLXh', ( - r'https://[a-z]\d+.erome.com/\d{3}/vqtPuLXh/KH2qBT99_480p.mp4', - )), - ('https://www.erome.com/a/ORhX0FZz', ( - r'https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/9IYQocM9_480p.mp4', - r'https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/9eEDc8xm_480p.mp4', - r'https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/EvApC7Rp_480p.mp4', - r'https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/LruobtMs_480p.mp4', - r'https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/TJNmSUU5_480p.mp4', - r'https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/X11Skh6Z_480p.mp4', - r'https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/bjlTkpn7_480p.mp4' - )), -)) +@pytest.mark.parametrize( + ("test_url", "expected_urls"), + ( + ("https://www.erome.com/a/vqtPuLXh", (r"https://[a-z]\d+.erome.com/\d{3}/vqtPuLXh/KH2qBT99_480p.mp4",)), + ( + "https://www.erome.com/a/ORhX0FZz", + ( + r"https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/9IYQocM9_480p.mp4", + r"https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/9eEDc8xm_480p.mp4", + r"https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/EvApC7Rp_480p.mp4", + r"https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/LruobtMs_480p.mp4", + r"https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/TJNmSUU5_480p.mp4", + r"https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/X11Skh6Z_480p.mp4", + r"https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/bjlTkpn7_480p.mp4", + ), + ), + ), +) def test_get_link(test_url: str, expected_urls: tuple[str]): - result = Erome. _get_links(test_url) + result = Erome._get_links(test_url) assert all([any([re.match(p, r) for r in result]) for p in expected_urls]) @pytest.mark.online @pytest.mark.slow -@pytest.mark.parametrize(('test_url', 'expected_hashes_len'), ( - ('https://www.erome.com/a/vqtPuLXh', 1), - ('https://www.erome.com/a/4tP3KI6F', 1), -)) +@pytest.mark.parametrize( + ("test_url", "expected_hashes_len"), + ( + ("https://www.erome.com/a/vqtPuLXh", 1), + ("https://www.erome.com/a/4tP3KI6F", 1), + ), +) def test_download_resource(test_url: str, expected_hashes_len: int): # Can't compare hashes for this test, Erome doesn't return the exact same file from request to request so the hash # will change back and forth randomly diff --git a/tests/site_downloaders/test_gallery.py b/tests/site_downloaders/test_gallery.py index e9c401f..57d055b 100644 --- a/tests/site_downloaders/test_gallery.py +++ b/tests/site_downloaders/test_gallery.py @@ -9,30 +9,39 @@ from bdfr.site_downloaders.gallery import Gallery @pytest.mark.online -@pytest.mark.parametrize(('test_ids', 'expected'), ( - ([ - {'media_id': '18nzv9ch0hn61'}, - {'media_id': 'jqkizcch0hn61'}, - {'media_id': 'k0fnqzbh0hn61'}, - {'media_id': 'm3gamzbh0hn61'}, - ], { - 'https://i.redd.it/18nzv9ch0hn61.jpg', - 'https://i.redd.it/jqkizcch0hn61.jpg', - 'https://i.redd.it/k0fnqzbh0hn61.jpg', - 'https://i.redd.it/m3gamzbh0hn61.jpg' - }), - ([ - {'media_id': '04vxj25uqih61'}, - {'media_id': '0fnx83kpqih61'}, - {'media_id': '7zkmr1wqqih61'}, - {'media_id': 'u37k5gxrqih61'}, - ], { - 'https://i.redd.it/04vxj25uqih61.png', - 'https://i.redd.it/0fnx83kpqih61.png', - 'https://i.redd.it/7zkmr1wqqih61.png', - 'https://i.redd.it/u37k5gxrqih61.png' - }), -)) +@pytest.mark.parametrize( + ("test_ids", "expected"), + ( + ( + [ + {"media_id": "18nzv9ch0hn61"}, + {"media_id": "jqkizcch0hn61"}, + {"media_id": "k0fnqzbh0hn61"}, + {"media_id": "m3gamzbh0hn61"}, + ], + { + "https://i.redd.it/18nzv9ch0hn61.jpg", + "https://i.redd.it/jqkizcch0hn61.jpg", + "https://i.redd.it/k0fnqzbh0hn61.jpg", + "https://i.redd.it/m3gamzbh0hn61.jpg", + }, + ), + ( + [ + {"media_id": "04vxj25uqih61"}, + {"media_id": "0fnx83kpqih61"}, + {"media_id": "7zkmr1wqqih61"}, + {"media_id": "u37k5gxrqih61"}, + ], + { + "https://i.redd.it/04vxj25uqih61.png", + "https://i.redd.it/0fnx83kpqih61.png", + "https://i.redd.it/7zkmr1wqqih61.png", + "https://i.redd.it/u37k5gxrqih61.png", + }, + ), + ), +) def test_gallery_get_links(test_ids: list[dict], expected: set[str]): results = Gallery._get_links(test_ids) assert set(results) == expected @@ -40,32 +49,47 @@ def test_gallery_get_links(test_ids: list[dict], expected: set[str]): @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_submission_id', 'expected_hashes'), ( - ('m6lvrh', { - '5c42b8341dd56eebef792e86f3981c6a', - '8f38d76da46f4057bf2773a778e725ca', - 'f5776f8f90491c8b770b8e0a6bfa49b3', - 'fa1a43c94da30026ad19a9813a0ed2c2', - }), - ('ljyy27', { - '359c203ec81d0bc00e675f1023673238', - '79262fd46bce5bfa550d878a3b898be4', - '808c35267f44acb523ce03bfa5687404', - 'ec8b65bdb7f1279c4b3af0ea2bbb30c3', - }), - ('obkflw', { - '65163f685fb28c5b776e0e77122718be', - '2a337eb5b13c34d3ca3f51b5db7c13e9', - }), - ('rb3ub6', { # patreon post - '748a976c6cedf7ea85b6f90e7cb685c7', - '839796d7745e88ced6355504e1f74508', - 'bcdb740367d0f19f97a77e614b48a42d', - '0f230b8c4e5d103d35a773fab9814ec3', - 'e5192d6cb4f84c4f4a658355310bf0f9', - '91cbe172cd8ccbcf049fcea4204eb979', - }) -)) +@pytest.mark.parametrize( + ("test_submission_id", "expected_hashes"), + ( + ( + "m6lvrh", + { + "5c42b8341dd56eebef792e86f3981c6a", + "8f38d76da46f4057bf2773a778e725ca", + "f5776f8f90491c8b770b8e0a6bfa49b3", + "fa1a43c94da30026ad19a9813a0ed2c2", + }, + ), + ( + "ljyy27", + { + "359c203ec81d0bc00e675f1023673238", + "79262fd46bce5bfa550d878a3b898be4", + "808c35267f44acb523ce03bfa5687404", + "ec8b65bdb7f1279c4b3af0ea2bbb30c3", + }, + ), + ( + "obkflw", + { + "65163f685fb28c5b776e0e77122718be", + "2a337eb5b13c34d3ca3f51b5db7c13e9", + }, + ), + ( + "rb3ub6", + { # patreon post + "748a976c6cedf7ea85b6f90e7cb685c7", + "839796d7745e88ced6355504e1f74508", + "bcdb740367d0f19f97a77e614b48a42d", + "0f230b8c4e5d103d35a773fab9814ec3", + "e5192d6cb4f84c4f4a658355310bf0f9", + "91cbe172cd8ccbcf049fcea4204eb979", + }, + ), + ), +) def test_gallery_download(test_submission_id: str, expected_hashes: set[str], reddit_instance: praw.Reddit): test_submission = reddit_instance.submission(id=test_submission_id) gallery = Gallery(test_submission) @@ -75,10 +99,13 @@ def test_gallery_download(test_submission_id: str, expected_hashes: set[str], re assert set(hashes) == expected_hashes -@pytest.mark.parametrize('test_id', ( - 'n0pyzp', - 'nxyahw', -)) +@pytest.mark.parametrize( + "test_id", + ( + "n0pyzp", + "nxyahw", + ), +) def test_gallery_download_raises_right_error(test_id: str, reddit_instance: praw.Reddit): test_submission = reddit_instance.submission(id=test_id) gallery = Gallery(test_submission) diff --git a/tests/site_downloaders/test_gfycat.py b/tests/site_downloaders/test_gfycat.py index 3b40840..d436636 100644 --- a/tests/site_downloaders/test_gfycat.py +++ b/tests/site_downloaders/test_gfycat.py @@ -10,20 +10,26 @@ from bdfr.site_downloaders.gfycat import Gfycat @pytest.mark.online -@pytest.mark.parametrize(('test_url', 'expected_url'), ( - ('https://gfycat.com/definitivecaninecrayfish', 'https://giant.gfycat.com/DefinitiveCanineCrayfish.mp4'), - ('https://gfycat.com/dazzlingsilkyiguana', 'https://giant.gfycat.com/DazzlingSilkyIguana.mp4'), -)) +@pytest.mark.parametrize( + ("test_url", "expected_url"), + ( + ("https://gfycat.com/definitivecaninecrayfish", "https://giant.gfycat.com/DefinitiveCanineCrayfish.mp4"), + ("https://gfycat.com/dazzlingsilkyiguana", "https://giant.gfycat.com/DazzlingSilkyIguana.mp4"), + ), +) def test_get_link(test_url: str, expected_url: str): result = Gfycat._get_link(test_url) assert result.pop() == expected_url @pytest.mark.online -@pytest.mark.parametrize(('test_url', 'expected_hash'), ( - ('https://gfycat.com/definitivecaninecrayfish', '48f9bd4dbec1556d7838885612b13b39'), - ('https://gfycat.com/dazzlingsilkyiguana', '808941b48fc1e28713d36dd7ed9dc648'), -)) +@pytest.mark.parametrize( + ("test_url", "expected_hash"), + ( + ("https://gfycat.com/definitivecaninecrayfish", "48f9bd4dbec1556d7838885612b13b39"), + ("https://gfycat.com/dazzlingsilkyiguana", "808941b48fc1e28713d36dd7ed9dc648"), + ), +) def test_download_resource(test_url: str, expected_hash: str): mock_submission = Mock() mock_submission.url = test_url diff --git a/tests/site_downloaders/test_imgur.py b/tests/site_downloaders/test_imgur.py index 00419ba..38dbdc5 100644 --- a/tests/site_downloaders/test_imgur.py +++ b/tests/site_downloaders/test_imgur.py @@ -11,166 +11,167 @@ from bdfr.site_downloaders.imgur import Imgur @pytest.mark.online -@pytest.mark.parametrize(('test_url', 'expected_gen_dict', 'expected_image_dict'), ( +@pytest.mark.parametrize( + ("test_url", "expected_gen_dict", "expected_image_dict"), ( - 'https://imgur.com/a/xWZsDDP', - {'num_images': '1', 'id': 'xWZsDDP', 'hash': 'xWZsDDP'}, - [ - {'hash': 'ypa8YfS', 'title': '', 'ext': '.png', 'animated': False} - ] + ( + "https://imgur.com/a/xWZsDDP", + {"num_images": "1", "id": "xWZsDDP", "hash": "xWZsDDP"}, + [{"hash": "ypa8YfS", "title": "", "ext": ".png", "animated": False}], + ), + ( + "https://imgur.com/gallery/IjJJdlC", + {"num_images": 1, "id": 384898055, "hash": "IjJJdlC"}, + [ + { + "hash": "CbbScDt", + "description": "watch when he gets it", + "ext": ".gif", + "animated": True, + "has_sound": False, + } + ], + ), + ( + "https://imgur.com/a/dcc84Gt", + {"num_images": "4", "id": "dcc84Gt", "hash": "dcc84Gt"}, + [ + {"hash": "ylx0Kle", "ext": ".jpg", "title": ""}, + {"hash": "TdYfKbK", "ext": ".jpg", "title": ""}, + {"hash": "pCxGbe8", "ext": ".jpg", "title": ""}, + {"hash": "TSAkikk", "ext": ".jpg", "title": ""}, + ], + ), + ( + "https://m.imgur.com/a/py3RW0j", + { + "num_images": "1", + "id": "py3RW0j", + "hash": "py3RW0j", + }, + [{"hash": "K24eQmK", "has_sound": False, "ext": ".jpg"}], + ), ), - ( - 'https://imgur.com/gallery/IjJJdlC', - {'num_images': 1, 'id': 384898055, 'hash': 'IjJJdlC'}, - [ - {'hash': 'CbbScDt', - 'description': 'watch when he gets it', - 'ext': '.gif', - 'animated': True, - 'has_sound': False - } - ], - ), - ( - 'https://imgur.com/a/dcc84Gt', - {'num_images': '4', 'id': 'dcc84Gt', 'hash': 'dcc84Gt'}, - [ - {'hash': 'ylx0Kle', 'ext': '.jpg', 'title': ''}, - {'hash': 'TdYfKbK', 'ext': '.jpg', 'title': ''}, - {'hash': 'pCxGbe8', 'ext': '.jpg', 'title': ''}, - {'hash': 'TSAkikk', 'ext': '.jpg', 'title': ''}, - ] - ), - ( - 'https://m.imgur.com/a/py3RW0j', - {'num_images': '1', 'id': 'py3RW0j', 'hash': 'py3RW0j', }, - [ - {'hash': 'K24eQmK', 'has_sound': False, 'ext': '.jpg'} - ], - ), -)) +) def test_get_data_album(test_url: str, expected_gen_dict: dict, expected_image_dict: list[dict]): result = Imgur._get_data(test_url) assert all([result.get(key) == expected_gen_dict[key] for key in expected_gen_dict.keys()]) # Check if all the keys from the test dict are correct in at least one of the album entries - assert any([all([image.get(key) == image_dict[key] for key in image_dict.keys()]) - for image_dict in expected_image_dict for image in result['album_images']['images']]) + assert any( + [ + all([image.get(key) == image_dict[key] for key in image_dict.keys()]) + for image_dict in expected_image_dict + for image in result["album_images"]["images"] + ] + ) @pytest.mark.online -@pytest.mark.parametrize(('test_url', 'expected_image_dict'), ( +@pytest.mark.parametrize( + ("test_url", "expected_image_dict"), ( - 'https://i.imgur.com/dLk3FGY.gifv', - {'hash': 'dLk3FGY', 'title': '', 'ext': '.mp4', 'animated': True} + ("https://i.imgur.com/dLk3FGY.gifv", {"hash": "dLk3FGY", "title": "", "ext": ".mp4", "animated": True}), + ( + "https://imgur.com/65FqTpT.gifv", + {"hash": "65FqTpT", "title": "", "description": "", "animated": True, "mimetype": "video/mp4"}, + ), ), - ( - 'https://imgur.com/65FqTpT.gifv', - { - 'hash': '65FqTpT', - 'title': '', - 'description': '', - 'animated': True, - 'mimetype': 'video/mp4' - }, - ), -)) +) def test_get_data_gif(test_url: str, expected_image_dict: dict): result = Imgur._get_data(test_url) assert all([result.get(key) == expected_image_dict[key] for key in expected_image_dict.keys()]) -@pytest.mark.parametrize('test_extension', ( - '.gif', - '.png', - '.jpg', - '.mp4' -)) +@pytest.mark.parametrize("test_extension", (".gif", ".png", ".jpg", ".mp4")) def test_imgur_extension_validation_good(test_extension: str): result = Imgur._validate_extension(test_extension) assert result == test_extension -@pytest.mark.parametrize('test_extension', ( - '.jpeg', - 'bad', - '.avi', - '.test', - '.flac', -)) +@pytest.mark.parametrize( + "test_extension", + ( + ".jpeg", + "bad", + ".avi", + ".test", + ".flac", + ), +) def test_imgur_extension_validation_bad(test_extension: str): with pytest.raises(SiteDownloaderError): Imgur._validate_extension(test_extension) @pytest.mark.online -@pytest.mark.parametrize(('test_url', 'expected_hashes'), ( +@pytest.mark.parametrize( + ("test_url", "expected_hashes"), ( - 'https://imgur.com/a/xWZsDDP', - ('f551d6e6b0fef2ce909767338612e31b',) - ), - ( - 'https://imgur.com/gallery/IjJJdlC', - ('740b006cf9ec9d6f734b6e8f5130bdab',), - ), - ( - 'https://imgur.com/a/dcc84Gt', + ("https://imgur.com/a/xWZsDDP", ("f551d6e6b0fef2ce909767338612e31b",)), ( - 'cf1158e1de5c3c8993461383b96610cf', - '28d6b791a2daef8aa363bf5a3198535d', - '248ef8f2a6d03eeb2a80d0123dbaf9b6', - '029c475ce01b58fdf1269d8771d33913', + "https://imgur.com/gallery/IjJJdlC", + ("740b006cf9ec9d6f734b6e8f5130bdab",), + ), + ( + "https://imgur.com/a/dcc84Gt", + ( + "cf1158e1de5c3c8993461383b96610cf", + "28d6b791a2daef8aa363bf5a3198535d", + "248ef8f2a6d03eeb2a80d0123dbaf9b6", + "029c475ce01b58fdf1269d8771d33913", + ), + ), + ( + "https://imgur.com/a/eemHCCK", + ( + "9cb757fd8f055e7ef7aa88addc9d9fa5", + "b6cb6c918e2544e96fb7c07d828774b5", + "fb6c913d721c0bbb96aa65d7f560d385", + ), + ), + ( + "https://i.imgur.com/lFJai6i.gifv", + ("01a6e79a30bec0e644e5da12365d5071",), + ), + ( + "https://i.imgur.com/ywSyILa.gifv?", + ("56d4afc32d2966017c38d98568709b45",), + ), + ( + "https://imgur.com/ubYwpbk.GIFV", + ("d4a774aac1667783f9ed3a1bd02fac0c",), + ), + ( + "https://i.imgur.com/j1CNCZY.gifv", + ("58e7e6d972058c18b7ecde910ca147e3",), + ), + ( + "https://i.imgur.com/uTvtQsw.gifv", + ("46c86533aa60fc0e09f2a758513e3ac2",), + ), + ( + "https://i.imgur.com/OGeVuAe.giff", + ("77389679084d381336f168538793f218",), + ), + ( + "https://i.imgur.com/OGeVuAe.gift", + ("77389679084d381336f168538793f218",), + ), + ( + "https://i.imgur.com/3SKrQfK.jpg?1", + ("aa299e181b268578979cad176d1bd1d0",), + ), + ( + "https://i.imgur.com/cbivYRW.jpg?3", + ("7ec6ceef5380cb163a1d498c359c51fd",), + ), + ( + "http://i.imgur.com/s9uXxlq.jpg?5.jpg", + ("338de3c23ee21af056b3a7c154e2478f",), ), ), - ( - 'https://imgur.com/a/eemHCCK', - ( - '9cb757fd8f055e7ef7aa88addc9d9fa5', - 'b6cb6c918e2544e96fb7c07d828774b5', - 'fb6c913d721c0bbb96aa65d7f560d385', - ), - ), - ( - 'https://i.imgur.com/lFJai6i.gifv', - ('01a6e79a30bec0e644e5da12365d5071',), - ), - ( - 'https://i.imgur.com/ywSyILa.gifv?', - ('56d4afc32d2966017c38d98568709b45',), - ), - ( - 'https://imgur.com/ubYwpbk.GIFV', - ('d4a774aac1667783f9ed3a1bd02fac0c',), - ), - ( - 'https://i.imgur.com/j1CNCZY.gifv', - ('58e7e6d972058c18b7ecde910ca147e3',), - ), - ( - 'https://i.imgur.com/uTvtQsw.gifv', - ('46c86533aa60fc0e09f2a758513e3ac2',), - ), - ( - 'https://i.imgur.com/OGeVuAe.giff', - ('77389679084d381336f168538793f218',), - ), - ( - 'https://i.imgur.com/OGeVuAe.gift', - ('77389679084d381336f168538793f218',), - ), - ( - 'https://i.imgur.com/3SKrQfK.jpg?1', - ('aa299e181b268578979cad176d1bd1d0',), - ), - ( - 'https://i.imgur.com/cbivYRW.jpg?3', - ('7ec6ceef5380cb163a1d498c359c51fd',), - ), - ( - 'http://i.imgur.com/s9uXxlq.jpg?5.jpg', - ('338de3c23ee21af056b3a7c154e2478f',), - ), -)) +) def test_find_resources(test_url: str, expected_hashes: list[str]): mock_download = Mock() mock_download.url = test_url diff --git a/tests/site_downloaders/test_pornhub.py b/tests/site_downloaders/test_pornhub.py index e0933b0..42ca5a0 100644 --- a/tests/site_downloaders/test_pornhub.py +++ b/tests/site_downloaders/test_pornhub.py @@ -12,9 +12,10 @@ from bdfr.site_downloaders.pornhub import PornHub @pytest.mark.online @pytest.mark.slow -@pytest.mark.parametrize(('test_url', 'expected_hash'), ( - ('https://www.pornhub.com/view_video.php?viewkey=ph6074c59798497', 'ad52a0f4fce8f99df0abed17de1d04c7'), -)) +@pytest.mark.parametrize( + ("test_url", "expected_hash"), + (("https://www.pornhub.com/view_video.php?viewkey=ph6074c59798497", "ad52a0f4fce8f99df0abed17de1d04c7"),), +) def test_hash_resources_good(test_url: str, expected_hash: str): test_submission = MagicMock() test_submission.url = test_url @@ -27,9 +28,7 @@ def test_hash_resources_good(test_url: str, expected_hash: str): @pytest.mark.online -@pytest.mark.parametrize('test_url', ( - 'https://www.pornhub.com/view_video.php?viewkey=ph5ede121f0d3f8', -)) +@pytest.mark.parametrize("test_url", ("https://www.pornhub.com/view_video.php?viewkey=ph5ede121f0d3f8",)) def test_find_resources_good(test_url: str): test_submission = MagicMock() test_submission.url = test_url diff --git a/tests/site_downloaders/test_redgifs.py b/tests/site_downloaders/test_redgifs.py index 9a6d132..0e1a497 100644 --- a/tests/site_downloaders/test_redgifs.py +++ b/tests/site_downloaders/test_redgifs.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 # coding=utf-8 -from unittest.mock import Mock import re +from unittest.mock import Mock import pytest @@ -11,45 +11,55 @@ from bdfr.site_downloaders.redgifs import Redgifs @pytest.mark.online -@pytest.mark.parametrize(('test_url', 'expected'), ( - ('https://redgifs.com/watch/frighteningvictorioussalamander', - {'FrighteningVictoriousSalamander.mp4'}), - ('https://redgifs.com/watch/springgreendecisivetaruca', - {'SpringgreenDecisiveTaruca.mp4'}), - ('https://www.redgifs.com/watch/palegoldenrodrawhalibut', - {'PalegoldenrodRawHalibut.mp4'}), - ('https://redgifs.com/watch/hollowintentsnowyowl', - {'HollowIntentSnowyowl-large.jpg'}), - ('https://www.redgifs.com/watch/lustrousstickywaxwing', - {'EntireEnchantingHypsilophodon-large.jpg', - 'FancyMagnificentAdamsstaghornedbeetle-large.jpg', - 'LustrousStickyWaxwing-large.jpg', - 'ParchedWindyArmyworm-large.jpg', - 'ThunderousColorlessErmine-large.jpg', - 'UnripeUnkemptWoodpecker-large.jpg'}), -)) +@pytest.mark.parametrize( + ("test_url", "expected"), + ( + ("https://redgifs.com/watch/frighteningvictorioussalamander", {"FrighteningVictoriousSalamander.mp4"}), + ("https://redgifs.com/watch/springgreendecisivetaruca", {"SpringgreenDecisiveTaruca.mp4"}), + ("https://www.redgifs.com/watch/palegoldenrodrawhalibut", {"PalegoldenrodRawHalibut.mp4"}), + ("https://redgifs.com/watch/hollowintentsnowyowl", {"HollowIntentSnowyowl-large.jpg"}), + ( + "https://www.redgifs.com/watch/lustrousstickywaxwing", + { + "EntireEnchantingHypsilophodon-large.jpg", + "FancyMagnificentAdamsstaghornedbeetle-large.jpg", + "LustrousStickyWaxwing-large.jpg", + "ParchedWindyArmyworm-large.jpg", + "ThunderousColorlessErmine-large.jpg", + "UnripeUnkemptWoodpecker-large.jpg", + }, + ), + ), +) def test_get_link(test_url: str, expected: set[str]): result = Redgifs._get_link(test_url) result = list(result) - patterns = [r'https://thumbs\d\.redgifs\.com/' + e + r'.*' for e in expected] + patterns = [r"https://thumbs\d\.redgifs\.com/" + e + r".*" for e in expected] assert all([re.match(p, r) for p in patterns] for r in result) @pytest.mark.online -@pytest.mark.parametrize(('test_url', 'expected_hashes'), ( - ('https://redgifs.com/watch/frighteningvictorioussalamander', {'4007c35d9e1f4b67091b5f12cffda00a'}), - ('https://redgifs.com/watch/springgreendecisivetaruca', {'8dac487ac49a1f18cc1b4dabe23f0869'}), - ('https://redgifs.com/watch/leafysaltydungbeetle', {'076792c660b9c024c0471ef4759af8bd'}), - ('https://www.redgifs.com/watch/palegoldenrodrawhalibut', {'46d5aa77fe80c6407de1ecc92801c10e'}), - ('https://redgifs.com/watch/hollowintentsnowyowl', {'5ee51fa15e0a58e98f11dea6a6cca771'}), - ('https://www.redgifs.com/watch/lustrousstickywaxwing', - {'b461e55664f07bed8d2f41d8586728fa', - '30ba079a8ed7d7adf17929dc3064c10f', - '0d4f149d170d29fc2f015c1121bab18b', - '53987d99cfd77fd65b5fdade3718f9f1', - 'fb2e7d972846b83bf4016447d3060d60', - '44fb28f72ec9a5cca63fa4369ab4f672'}), -)) +@pytest.mark.parametrize( + ("test_url", "expected_hashes"), + ( + ("https://redgifs.com/watch/frighteningvictorioussalamander", {"4007c35d9e1f4b67091b5f12cffda00a"}), + ("https://redgifs.com/watch/springgreendecisivetaruca", {"8dac487ac49a1f18cc1b4dabe23f0869"}), + ("https://redgifs.com/watch/leafysaltydungbeetle", {"076792c660b9c024c0471ef4759af8bd"}), + ("https://www.redgifs.com/watch/palegoldenrodrawhalibut", {"46d5aa77fe80c6407de1ecc92801c10e"}), + ("https://redgifs.com/watch/hollowintentsnowyowl", {"5ee51fa15e0a58e98f11dea6a6cca771"}), + ( + "https://www.redgifs.com/watch/lustrousstickywaxwing", + { + "b461e55664f07bed8d2f41d8586728fa", + "30ba079a8ed7d7adf17929dc3064c10f", + "0d4f149d170d29fc2f015c1121bab18b", + "53987d99cfd77fd65b5fdade3718f9f1", + "fb2e7d972846b83bf4016447d3060d60", + "44fb28f72ec9a5cca63fa4369ab4f672", + }, + ), + ), +) def test_download_resource(test_url: str, expected_hashes: set[str]): mock_submission = Mock() mock_submission.url = test_url @@ -62,18 +72,30 @@ def test_download_resource(test_url: str, expected_hashes: set[str]): @pytest.mark.online -@pytest.mark.parametrize(('test_url', 'expected_link', 'expected_hash'), ( - ('https://redgifs.com/watch/flippantmemorablebaiji', {'FlippantMemorableBaiji-mobile.mp4'}, - {'41a5fb4865367ede9f65fc78736f497a'}), - ('https://redgifs.com/watch/thirstyunfortunatewaterdragons', {'thirstyunfortunatewaterdragons-mobile.mp4'}, - {'1a51dad8fedb594bdd84f027b3cbe8af'}), - ('https://redgifs.com/watch/conventionalplainxenopterygii', {'conventionalplainxenopterygii-mobile.mp4'}, - {'2e1786b3337da85b80b050e2c289daa4'}) -)) +@pytest.mark.parametrize( + ("test_url", "expected_link", "expected_hash"), + ( + ( + "https://redgifs.com/watch/flippantmemorablebaiji", + {"FlippantMemorableBaiji-mobile.mp4"}, + {"41a5fb4865367ede9f65fc78736f497a"}, + ), + ( + "https://redgifs.com/watch/thirstyunfortunatewaterdragons", + {"thirstyunfortunatewaterdragons-mobile.mp4"}, + {"1a51dad8fedb594bdd84f027b3cbe8af"}, + ), + ( + "https://redgifs.com/watch/conventionalplainxenopterygii", + {"conventionalplainxenopterygii-mobile.mp4"}, + {"2e1786b3337da85b80b050e2c289daa4"}, + ), + ), +) def test_hd_soft_fail(test_url: str, expected_link: set[str], expected_hash: set[str]): link = Redgifs._get_link(test_url) link = list(link) - patterns = [r'https://thumbs\d\.redgifs\.com/' + e + r'.*' for e in expected_link] + patterns = [r"https://thumbs\d\.redgifs\.com/" + e + r".*" for e in expected_link] assert all([re.match(p, r) for p in patterns] for r in link) mock_submission = Mock() mock_submission.url = test_url diff --git a/tests/site_downloaders/test_self_post.py b/tests/site_downloaders/test_self_post.py index e3363bb..104fb3b 100644 --- a/tests/site_downloaders/test_self_post.py +++ b/tests/site_downloaders/test_self_post.py @@ -10,11 +10,14 @@ from bdfr.site_downloaders.self_post import SelfPost @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_submission_id', 'expected_hash'), ( - ('ltmivt', '7d2c9e4e989e5cf2dca2e55a06b1c4f6'), - ('ltoaan', '221606386b614d6780c2585a59bd333f'), - ('d3sc8o', 'c1ff2b6bd3f6b91381dcd18dfc4ca35f'), -)) +@pytest.mark.parametrize( + ("test_submission_id", "expected_hash"), + ( + ("ltmivt", "7d2c9e4e989e5cf2dca2e55a06b1c4f6"), + ("ltoaan", "221606386b614d6780c2585a59bd333f"), + ("d3sc8o", "c1ff2b6bd3f6b91381dcd18dfc4ca35f"), + ), +) def test_find_resource(test_submission_id: str, expected_hash: str, reddit_instance: praw.Reddit): submission = reddit_instance.submission(id=test_submission_id) downloader = SelfPost(submission) diff --git a/tests/site_downloaders/test_vidble.py b/tests/site_downloaders/test_vidble.py index f6ddd56..16b5a3b 100644 --- a/tests/site_downloaders/test_vidble.py +++ b/tests/site_downloaders/test_vidble.py @@ -8,55 +8,83 @@ from bdfr.resource import Resource from bdfr.site_downloaders.vidble import Vidble -@pytest.mark.parametrize(('test_url', 'expected'), ( - ('/RDFbznUvcN_med.jpg', '/RDFbznUvcN.jpg'), -)) +@pytest.mark.parametrize(("test_url", "expected"), (("/RDFbznUvcN_med.jpg", "/RDFbznUvcN.jpg"),)) def test_change_med_url(test_url: str, expected: str): result = Vidble.change_med_url(test_url) assert result == expected @pytest.mark.online -@pytest.mark.parametrize(('test_url', 'expected'), ( - ('https://www.vidble.com/show/UxsvAssYe5', { - 'https://www.vidble.com/UxsvAssYe5.gif', - }), - ('https://vidble.com/show/RDFbznUvcN', { - 'https://www.vidble.com/RDFbznUvcN.jpg', - }), - ('https://vidble.com/album/h0jTLs6B', { - 'https://www.vidble.com/XG4eAoJ5JZ.jpg', - 'https://www.vidble.com/IqF5UdH6Uq.jpg', - 'https://www.vidble.com/VWuNsnLJMD.jpg', - 'https://www.vidble.com/sMmM8O650W.jpg', - }), - ('https://www.vidble.com/pHuwWkOcEb', { - 'https://www.vidble.com/pHuwWkOcEb.jpg', - }), -)) +@pytest.mark.parametrize( + ("test_url", "expected"), + ( + ( + "https://www.vidble.com/show/UxsvAssYe5", + { + "https://www.vidble.com/UxsvAssYe5.gif", + }, + ), + ( + "https://vidble.com/show/RDFbznUvcN", + { + "https://www.vidble.com/RDFbznUvcN.jpg", + }, + ), + ( + "https://vidble.com/album/h0jTLs6B", + { + "https://www.vidble.com/XG4eAoJ5JZ.jpg", + "https://www.vidble.com/IqF5UdH6Uq.jpg", + "https://www.vidble.com/VWuNsnLJMD.jpg", + "https://www.vidble.com/sMmM8O650W.jpg", + }, + ), + ( + "https://www.vidble.com/pHuwWkOcEb", + { + "https://www.vidble.com/pHuwWkOcEb.jpg", + }, + ), + ), +) def test_get_links(test_url: str, expected: set[str]): results = Vidble.get_links(test_url) assert results == expected @pytest.mark.online -@pytest.mark.parametrize(('test_url', 'expected_hashes'), ( - ('https://www.vidble.com/show/UxsvAssYe5', { - '0ef2f8e0e0b45936d2fb3e6fbdf67e28', - }), - ('https://vidble.com/show/RDFbznUvcN', { - 'c2dd30a71e32369c50eed86f86efff58', - }), - ('https://vidble.com/album/h0jTLs6B', { - '3b3cba02e01c91f9858a95240b942c71', - 'dd6ecf5fc9e936f9fb614eb6a0537f99', - 'b31a942cd8cdda218ed547bbc04c3a27', - '6f77c570b451eef4222804bd52267481', - }), - ('https://www.vidble.com/pHuwWkOcEb', { - '585f486dd0b2f23a57bddbd5bf185bc7', - }), -)) +@pytest.mark.parametrize( + ("test_url", "expected_hashes"), + ( + ( + "https://www.vidble.com/show/UxsvAssYe5", + { + "0ef2f8e0e0b45936d2fb3e6fbdf67e28", + }, + ), + ( + "https://vidble.com/show/RDFbznUvcN", + { + "c2dd30a71e32369c50eed86f86efff58", + }, + ), + ( + "https://vidble.com/album/h0jTLs6B", + { + "3b3cba02e01c91f9858a95240b942c71", + "dd6ecf5fc9e936f9fb614eb6a0537f99", + "b31a942cd8cdda218ed547bbc04c3a27", + "6f77c570b451eef4222804bd52267481", + }, + ), + ( + "https://www.vidble.com/pHuwWkOcEb", + { + "585f486dd0b2f23a57bddbd5bf185bc7", + }, + ), + ), +) def test_find_resources(test_url: str, expected_hashes: set[str]): mock_download = Mock() mock_download.url = test_url diff --git a/tests/site_downloaders/test_vreddit.py b/tests/site_downloaders/test_vreddit.py index 54ffcf8..6e79ba0 100644 --- a/tests/site_downloaders/test_vreddit.py +++ b/tests/site_downloaders/test_vreddit.py @@ -12,9 +12,10 @@ from bdfr.site_downloaders.vreddit import VReddit @pytest.mark.online @pytest.mark.slow -@pytest.mark.parametrize(('test_url', 'expected_hash'), ( - ('https://reddit.com/r/Unexpected/comments/z4xsuj/omg_thats_so_cute/', '1ffab5e5c0cc96db18108e4f37e8ca7f'), -)) +@pytest.mark.parametrize( + ("test_url", "expected_hash"), + (("https://reddit.com/r/Unexpected/comments/z4xsuj/omg_thats_so_cute/", "1ffab5e5c0cc96db18108e4f37e8ca7f"),), +) def test_find_resources_good(test_url: str, expected_hash: str): test_submission = MagicMock() test_submission.url = test_url @@ -27,10 +28,13 @@ def test_find_resources_good(test_url: str, expected_hash: str): @pytest.mark.online -@pytest.mark.parametrize('test_url', ( - 'https://www.polygon.com/disney-plus/2020/5/14/21249881/gargoyles-animated-series-disney-plus-greg-weisman' - '-interview-oj-simpson-goliath-chronicles', -)) +@pytest.mark.parametrize( + "test_url", + ( + "https://www.polygon.com/disney-plus/2020/5/14/21249881/gargoyles-animated-series-disney-plus-greg-weisman" + "-interview-oj-simpson-goliath-chronicles", + ), +) def test_find_resources_bad(test_url: str): test_submission = MagicMock() test_submission.url = test_url diff --git a/tests/site_downloaders/test_youtube.py b/tests/site_downloaders/test_youtube.py index 14c6648..7a45a3c 100644 --- a/tests/site_downloaders/test_youtube.py +++ b/tests/site_downloaders/test_youtube.py @@ -12,10 +12,13 @@ from bdfr.site_downloaders.youtube import Youtube @pytest.mark.online @pytest.mark.slow -@pytest.mark.parametrize(('test_url', 'expected_hash'), ( - ('https://www.youtube.com/watch?v=uSm2VDgRIUs', '2d60b54582df5b95ec72bb00b580d2ff'), - ('https://www.youtube.com/watch?v=GcI7nxQj7HA', '5db0fc92a0a7fb9ac91e63505eea9cf0'), -)) +@pytest.mark.parametrize( + ("test_url", "expected_hash"), + ( + ("https://www.youtube.com/watch?v=uSm2VDgRIUs", "2d60b54582df5b95ec72bb00b580d2ff"), + ("https://www.youtube.com/watch?v=GcI7nxQj7HA", "5db0fc92a0a7fb9ac91e63505eea9cf0"), + ), +) def test_find_resources_good(test_url: str, expected_hash: str): test_submission = MagicMock() test_submission.url = test_url @@ -28,10 +31,13 @@ def test_find_resources_good(test_url: str, expected_hash: str): @pytest.mark.online -@pytest.mark.parametrize('test_url', ( - 'https://www.polygon.com/disney-plus/2020/5/14/21249881/gargoyles-animated-series-disney-plus-greg-weisman' - '-interview-oj-simpson-goliath-chronicles', -)) +@pytest.mark.parametrize( + "test_url", + ( + "https://www.polygon.com/disney-plus/2020/5/14/21249881/gargoyles-animated-series-disney-plus-greg-weisman" + "-interview-oj-simpson-goliath-chronicles", + ), +) def test_find_resources_bad(test_url: str): test_submission = MagicMock() test_submission.url = test_url diff --git a/tests/test_archiver.py b/tests/test_archiver.py index 627caee..932a2ab 100644 --- a/tests/test_archiver.py +++ b/tests/test_archiver.py @@ -12,15 +12,18 @@ from bdfr.archiver import Archiver @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_submission_id', 'test_format'), ( - ('m3reby', 'xml'), - ('m3reby', 'json'), - ('m3reby', 'yaml'), -)) +@pytest.mark.parametrize( + ("test_submission_id", "test_format"), + ( + ("m3reby", "xml"), + ("m3reby", "json"), + ("m3reby", "yaml"), + ), +) def test_write_submission_json(test_submission_id: str, tmp_path: Path, test_format: str, reddit_instance: praw.Reddit): archiver_mock = MagicMock() archiver_mock.args.format = test_format - test_path = Path(tmp_path, 'test') + test_path = Path(tmp_path, "test") test_submission = reddit_instance.submission(id=test_submission_id) archiver_mock.file_name_formatter.format_path.return_value = test_path Archiver.write_entry(archiver_mock, test_submission) diff --git a/tests/test_configuration.py b/tests/test_configuration.py index 060f145..652c401 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -8,13 +8,16 @@ import pytest from bdfr.configuration import Configuration -@pytest.mark.parametrize('arg_dict', ( - {'directory': 'test_dir'}, - { - 'directory': 'test_dir', - 'no_dupes': True, - }, -)) +@pytest.mark.parametrize( + "arg_dict", + ( + {"directory": "test_dir"}, + { + "directory": "test_dir", + "no_dupes": True, + }, + ), +) def test_process_click_context(arg_dict: dict): test_config = Configuration() test_context = MagicMock() @@ -25,9 +28,9 @@ def test_process_click_context(arg_dict: dict): def test_yaml_file_read(): - file = './tests/yaml_test_configuration.yaml' + file = "./tests/yaml_test_configuration.yaml" test_config = Configuration() test_config.parse_yaml_options(file) - assert test_config.subreddit == ['EarthPorn', 'TwoXChromosomes', 'Mindustry'] - assert test_config.sort == 'new' + assert test_config.subreddit == ["EarthPorn", "TwoXChromosomes", "Mindustry"] + assert test_config.sort == "new" assert test_config.limit == 10 diff --git a/tests/test_connector.py b/tests/test_connector.py index 4c9e52d..01b6a92 100644 --- a/tests/test_connector.py +++ b/tests/test_connector.py @@ -20,7 +20,7 @@ from bdfr.site_authenticator import SiteAuthenticator @pytest.fixture() def args() -> Configuration: args = Configuration() - args.time_format = 'ISO' + args.time_format = "ISO" return args @@ -30,7 +30,8 @@ def downloader_mock(args: Configuration): downloader_mock.args = args downloader_mock.sanitise_subreddit_name = RedditConnector.sanitise_subreddit_name downloader_mock.create_filtered_listing_generator = lambda x: RedditConnector.create_filtered_listing_generator( - downloader_mock, x) + downloader_mock, x + ) downloader_mock.split_args_input = RedditConnector.split_args_input downloader_mock.master_hash_list = {} return downloader_mock @@ -55,16 +56,22 @@ def assert_all_results_are_submissions_or_comments(result_limit: int, results: l def test_determine_directories(tmp_path: Path, downloader_mock: MagicMock): - downloader_mock.args.directory = tmp_path / 'test' + downloader_mock.args.directory = tmp_path / "test" downloader_mock.config_directories.user_config_dir = tmp_path RedditConnector.determine_directories(downloader_mock) - assert Path(tmp_path / 'test').exists() + assert Path(tmp_path / "test").exists() -@pytest.mark.parametrize(('skip_extensions', 'skip_domains'), ( - ([], []), - (['.test'], ['test.com'],), -)) +@pytest.mark.parametrize( + ("skip_extensions", "skip_domains"), + ( + ([], []), + ( + [".test"], + ["test.com"], + ), + ), +) def test_create_download_filter(skip_extensions: list[str], skip_domains: list[str], downloader_mock: MagicMock): downloader_mock.args.skip = skip_extensions downloader_mock.args.skip_domain = skip_domains @@ -75,14 +82,17 @@ def test_create_download_filter(skip_extensions: list[str], skip_domains: list[s assert result.excluded_extensions == skip_extensions -@pytest.mark.parametrize(('test_time', 'expected'), ( - ('all', 'all'), - ('hour', 'hour'), - ('day', 'day'), - ('week', 'week'), - ('random', 'all'), - ('', 'all'), -)) +@pytest.mark.parametrize( + ("test_time", "expected"), + ( + ("all", "all"), + ("hour", "hour"), + ("day", "day"), + ("week", "week"), + ("random", "all"), + ("", "all"), + ), +) def test_create_time_filter(test_time: str, expected: str, downloader_mock: MagicMock): downloader_mock.args.time = test_time result = RedditConnector.create_time_filter(downloader_mock) @@ -91,12 +101,15 @@ def test_create_time_filter(test_time: str, expected: str, downloader_mock: Magi assert result.name.lower() == expected -@pytest.mark.parametrize(('test_sort', 'expected'), ( - ('', 'hot'), - ('hot', 'hot'), - ('controversial', 'controversial'), - ('new', 'new'), -)) +@pytest.mark.parametrize( + ("test_sort", "expected"), + ( + ("", "hot"), + ("hot", "hot"), + ("controversial", "controversial"), + ("new", "new"), + ), +) def test_create_sort_filter(test_sort: str, expected: str, downloader_mock: MagicMock): downloader_mock.args.sort = test_sort result = RedditConnector.create_sort_filter(downloader_mock) @@ -105,13 +118,16 @@ def test_create_sort_filter(test_sort: str, expected: str, downloader_mock: Magi assert result.name.lower() == expected -@pytest.mark.parametrize(('test_file_scheme', 'test_folder_scheme'), ( - ('{POSTID}', '{SUBREDDIT}'), - ('{REDDITOR}_{TITLE}_{POSTID}', '{SUBREDDIT}'), - ('{POSTID}', 'test'), - ('{POSTID}', ''), - ('{POSTID}', '{SUBREDDIT}/{REDDITOR}'), -)) +@pytest.mark.parametrize( + ("test_file_scheme", "test_folder_scheme"), + ( + ("{POSTID}", "{SUBREDDIT}"), + ("{REDDITOR}_{TITLE}_{POSTID}", "{SUBREDDIT}"), + ("{POSTID}", "test"), + ("{POSTID}", ""), + ("{POSTID}", "{SUBREDDIT}/{REDDITOR}"), + ), +) def test_create_file_name_formatter(test_file_scheme: str, test_folder_scheme: str, downloader_mock: MagicMock): downloader_mock.args.file_scheme = test_file_scheme downloader_mock.args.folder_scheme = test_folder_scheme @@ -119,14 +135,17 @@ def test_create_file_name_formatter(test_file_scheme: str, test_folder_scheme: s assert isinstance(result, FileNameFormatter) assert result.file_format_string == test_file_scheme - assert result.directory_format_string == test_folder_scheme.split('/') + assert result.directory_format_string == test_folder_scheme.split("/") -@pytest.mark.parametrize(('test_file_scheme', 'test_folder_scheme'), ( - ('', ''), - ('', '{SUBREDDIT}'), - ('test', '{SUBREDDIT}'), -)) +@pytest.mark.parametrize( + ("test_file_scheme", "test_folder_scheme"), + ( + ("", ""), + ("", "{SUBREDDIT}"), + ("test", "{SUBREDDIT}"), + ), +) def test_create_file_name_formatter_bad(test_file_scheme: str, test_folder_scheme: str, downloader_mock: MagicMock): downloader_mock.args.file_scheme = test_file_scheme downloader_mock.args.folder_scheme = test_folder_scheme @@ -141,15 +160,17 @@ def test_create_authenticator(downloader_mock: MagicMock): @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize('test_submission_ids', ( - ('lvpf4l',), - ('lvpf4l', 'lvqnsn'), - ('lvpf4l', 'lvqnsn', 'lvl9kd'), -)) +@pytest.mark.parametrize( + "test_submission_ids", + ( + ("lvpf4l",), + ("lvpf4l", "lvqnsn"), + ("lvpf4l", "lvqnsn", "lvl9kd"), + ), +) def test_get_submissions_from_link( - test_submission_ids: list[str], - reddit_instance: praw.Reddit, - downloader_mock: MagicMock): + test_submission_ids: list[str], reddit_instance: praw.Reddit, downloader_mock: MagicMock +): downloader_mock.args.link = test_submission_ids downloader_mock.reddit_instance = reddit_instance results = RedditConnector.get_submissions_from_link(downloader_mock) @@ -159,25 +180,28 @@ def test_get_submissions_from_link( @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_subreddits', 'limit', 'sort_type', 'time_filter', 'max_expected_len'), ( - (('Futurology',), 10, 'hot', 'all', 10), - (('Futurology', 'Mindustry, Python'), 10, 'hot', 'all', 30), - (('Futurology',), 20, 'hot', 'all', 20), - (('Futurology', 'Python'), 10, 'hot', 'all', 20), - (('Futurology',), 100, 'hot', 'all', 100), - (('Futurology',), 0, 'hot', 'all', 0), - (('Futurology',), 10, 'top', 'all', 10), - (('Futurology',), 10, 'top', 'week', 10), - (('Futurology',), 10, 'hot', 'week', 10), -)) +@pytest.mark.parametrize( + ("test_subreddits", "limit", "sort_type", "time_filter", "max_expected_len"), + ( + (("Futurology",), 10, "hot", "all", 10), + (("Futurology", "Mindustry, Python"), 10, "hot", "all", 30), + (("Futurology",), 20, "hot", "all", 20), + (("Futurology", "Python"), 10, "hot", "all", 20), + (("Futurology",), 100, "hot", "all", 100), + (("Futurology",), 0, "hot", "all", 0), + (("Futurology",), 10, "top", "all", 10), + (("Futurology",), 10, "top", "week", 10), + (("Futurology",), 10, "hot", "week", 10), + ), +) def test_get_subreddit_normal( - test_subreddits: list[str], - limit: int, - sort_type: str, - time_filter: str, - max_expected_len: int, - downloader_mock: MagicMock, - reddit_instance: praw.Reddit, + test_subreddits: list[str], + limit: int, + sort_type: str, + time_filter: str, + max_expected_len: int, + downloader_mock: MagicMock, + reddit_instance: praw.Reddit, ): downloader_mock.args.limit = limit downloader_mock.args.sort = sort_type @@ -197,26 +221,29 @@ def test_get_subreddit_normal( @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_time', 'test_delta'), ( - ('hour', timedelta(hours=1)), - ('day', timedelta(days=1)), - ('week', timedelta(days=7)), - ('month', timedelta(days=31)), - ('year', timedelta(days=365)), -)) +@pytest.mark.parametrize( + ("test_time", "test_delta"), + ( + ("hour", timedelta(hours=1)), + ("day", timedelta(days=1)), + ("week", timedelta(days=7)), + ("month", timedelta(days=31)), + ("year", timedelta(days=365)), + ), +) def test_get_subreddit_time_verification( - test_time: str, - test_delta: timedelta, - downloader_mock: MagicMock, - reddit_instance: praw.Reddit, + test_time: str, + test_delta: timedelta, + downloader_mock: MagicMock, + reddit_instance: praw.Reddit, ): downloader_mock.args.limit = 10 - downloader_mock.args.sort = 'top' + downloader_mock.args.sort = "top" downloader_mock.args.time = test_time downloader_mock.time_filter = RedditConnector.create_time_filter(downloader_mock) downloader_mock.sort_filter = RedditConnector.create_sort_filter(downloader_mock) downloader_mock.determine_sort_function.return_value = RedditConnector.determine_sort_function(downloader_mock) - downloader_mock.args.subreddit = ['all'] + downloader_mock.args.subreddit = ["all"] downloader_mock.reddit_instance = reddit_instance results = RedditConnector.get_subreddits(downloader_mock) results = [sub for res1 in results for sub in res1] @@ -230,20 +257,23 @@ def test_get_subreddit_time_verification( @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_subreddits', 'search_term', 'limit', 'time_filter', 'max_expected_len'), ( - (('Python',), 'scraper', 10, 'all', 10), - (('Python',), '', 10, 'all', 0), - (('Python',), 'djsdsgewef', 10, 'all', 0), - (('Python',), 'scraper', 10, 'year', 10), -)) +@pytest.mark.parametrize( + ("test_subreddits", "search_term", "limit", "time_filter", "max_expected_len"), + ( + (("Python",), "scraper", 10, "all", 10), + (("Python",), "", 10, "all", 0), + (("Python",), "djsdsgewef", 10, "all", 0), + (("Python",), "scraper", 10, "year", 10), + ), +) def test_get_subreddit_search( - test_subreddits: list[str], - search_term: str, - time_filter: str, - limit: int, - max_expected_len: int, - downloader_mock: MagicMock, - reddit_instance: praw.Reddit, + test_subreddits: list[str], + search_term: str, + time_filter: str, + limit: int, + max_expected_len: int, + downloader_mock: MagicMock, + reddit_instance: praw.Reddit, ): downloader_mock._determine_sort_function.return_value = praw.models.Subreddit.hot downloader_mock.args.limit = limit @@ -265,17 +295,20 @@ def test_get_subreddit_search( @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_user', 'test_multireddits', 'limit'), ( - ('helen_darten', ('cuteanimalpics',), 10), - ('korfor', ('chess',), 100), -)) +@pytest.mark.parametrize( + ("test_user", "test_multireddits", "limit"), + ( + ("helen_darten", ("cuteanimalpics",), 10), + ("korfor", ("chess",), 100), + ), +) # Good sources at https://www.reddit.com/r/multihub/ def test_get_multireddits_public( - test_user: str, - test_multireddits: list[str], - limit: int, - reddit_instance: praw.Reddit, - downloader_mock: MagicMock, + test_user: str, + test_multireddits: list[str], + limit: int, + reddit_instance: praw.Reddit, + downloader_mock: MagicMock, ): downloader_mock.determine_sort_function.return_value = praw.models.Subreddit.hot downloader_mock.sort_filter = RedditTypes.SortType.HOT @@ -283,11 +316,10 @@ def test_get_multireddits_public( downloader_mock.args.multireddit = test_multireddits downloader_mock.args.user = [test_user] downloader_mock.reddit_instance = reddit_instance - downloader_mock.create_filtered_listing_generator.return_value = \ - RedditConnector.create_filtered_listing_generator( - downloader_mock, - reddit_instance.multireddit(redditor=test_user, name=test_multireddits[0]), - ) + downloader_mock.create_filtered_listing_generator.return_value = RedditConnector.create_filtered_listing_generator( + downloader_mock, + reddit_instance.multireddit(redditor=test_user, name=test_multireddits[0]), + ) results = RedditConnector.get_multireddits(downloader_mock) results = [sub for res in results for sub in res] assert all([isinstance(res, praw.models.Submission) for res in results]) @@ -297,11 +329,14 @@ def test_get_multireddits_public( @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_user', 'limit'), ( - ('danigirl3694', 10), - ('danigirl3694', 50), - ('CapitanHam', None), -)) +@pytest.mark.parametrize( + ("test_user", "limit"), + ( + ("danigirl3694", 10), + ("danigirl3694", 50), + ("CapitanHam", None), + ), +) def test_get_user_submissions(test_user: str, limit: int, downloader_mock: MagicMock, reddit_instance: praw.Reddit): downloader_mock.args.limit = limit downloader_mock.determine_sort_function.return_value = praw.models.Subreddit.hot @@ -310,11 +345,10 @@ def test_get_user_submissions(test_user: str, limit: int, downloader_mock: Magic downloader_mock.args.user = [test_user] downloader_mock.authenticated = False downloader_mock.reddit_instance = reddit_instance - downloader_mock.create_filtered_listing_generator.return_value = \ - RedditConnector.create_filtered_listing_generator( - downloader_mock, - reddit_instance.redditor(test_user).submissions, - ) + downloader_mock.create_filtered_listing_generator.return_value = RedditConnector.create_filtered_listing_generator( + downloader_mock, + reddit_instance.redditor(test_user).submissions, + ) results = RedditConnector.get_user_data(downloader_mock) results = assert_all_results_are_submissions(limit, results) assert all([res.author.name == test_user for res in results]) @@ -324,21 +358,24 @@ def test_get_user_submissions(test_user: str, limit: int, downloader_mock: Magic @pytest.mark.online @pytest.mark.reddit @pytest.mark.authenticated -@pytest.mark.parametrize('test_flag', ( - 'upvoted', - 'saved', -)) +@pytest.mark.parametrize( + "test_flag", + ( + "upvoted", + "saved", + ), +) def test_get_user_authenticated_lists( - test_flag: str, - downloader_mock: MagicMock, - authenticated_reddit_instance: praw.Reddit, + test_flag: str, + downloader_mock: MagicMock, + authenticated_reddit_instance: praw.Reddit, ): downloader_mock.args.__dict__[test_flag] = True downloader_mock.reddit_instance = authenticated_reddit_instance downloader_mock.args.limit = 10 downloader_mock.determine_sort_function.return_value = praw.models.Subreddit.hot downloader_mock.sort_filter = RedditTypes.SortType.HOT - downloader_mock.args.user = [RedditConnector.resolve_user_name(downloader_mock, 'me')] + downloader_mock.args.user = [RedditConnector.resolve_user_name(downloader_mock, "me")] results = RedditConnector.get_user_data(downloader_mock) assert_all_results_are_submissions_or_comments(10, results) @@ -359,54 +396,63 @@ def test_get_subscribed_subreddits(downloader_mock: MagicMock, authenticated_red assert results -@pytest.mark.parametrize(('test_name', 'expected'), ( - ('Mindustry', 'Mindustry'), - ('Futurology', 'Futurology'), - ('r/Mindustry', 'Mindustry'), - ('TrollXChromosomes', 'TrollXChromosomes'), - ('r/TrollXChromosomes', 'TrollXChromosomes'), - ('https://www.reddit.com/r/TrollXChromosomes/', 'TrollXChromosomes'), - ('https://www.reddit.com/r/TrollXChromosomes', 'TrollXChromosomes'), - ('https://www.reddit.com/r/Futurology/', 'Futurology'), - ('https://www.reddit.com/r/Futurology', 'Futurology'), -)) +@pytest.mark.parametrize( + ("test_name", "expected"), + ( + ("Mindustry", "Mindustry"), + ("Futurology", "Futurology"), + ("r/Mindustry", "Mindustry"), + ("TrollXChromosomes", "TrollXChromosomes"), + ("r/TrollXChromosomes", "TrollXChromosomes"), + ("https://www.reddit.com/r/TrollXChromosomes/", "TrollXChromosomes"), + ("https://www.reddit.com/r/TrollXChromosomes", "TrollXChromosomes"), + ("https://www.reddit.com/r/Futurology/", "Futurology"), + ("https://www.reddit.com/r/Futurology", "Futurology"), + ), +) def test_sanitise_subreddit_name(test_name: str, expected: str): result = RedditConnector.sanitise_subreddit_name(test_name) assert result == expected -@pytest.mark.parametrize(('test_subreddit_entries', 'expected'), ( - (['test1', 'test2', 'test3'], {'test1', 'test2', 'test3'}), - (['test1,test2', 'test3'], {'test1', 'test2', 'test3'}), - (['test1, test2', 'test3'], {'test1', 'test2', 'test3'}), - (['test1; test2', 'test3'], {'test1', 'test2', 'test3'}), - (['test1, test2', 'test1,test2,test3', 'test4'], {'test1', 'test2', 'test3', 'test4'}), - ([''], {''}), - (['test'], {'test'}), -)) +@pytest.mark.parametrize( + ("test_subreddit_entries", "expected"), + ( + (["test1", "test2", "test3"], {"test1", "test2", "test3"}), + (["test1,test2", "test3"], {"test1", "test2", "test3"}), + (["test1, test2", "test3"], {"test1", "test2", "test3"}), + (["test1; test2", "test3"], {"test1", "test2", "test3"}), + (["test1, test2", "test1,test2,test3", "test4"], {"test1", "test2", "test3", "test4"}), + ([""], {""}), + (["test"], {"test"}), + ), +) def test_split_subreddit_entries(test_subreddit_entries: list[str], expected: set[str]): results = RedditConnector.split_args_input(test_subreddit_entries) assert results == expected def test_read_submission_ids_from_file(downloader_mock: MagicMock, tmp_path: Path): - test_file = tmp_path / 'test.txt' - test_file.write_text('aaaaaa\nbbbbbb') + test_file = tmp_path / "test.txt" + test_file.write_text("aaaaaa\nbbbbbb") results = RedditConnector.read_id_files([str(test_file)]) - assert results == {'aaaaaa', 'bbbbbb'} + assert results == {"aaaaaa", "bbbbbb"} @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize('test_redditor_name', ( - 'nasa', - 'crowdstrike', - 'HannibalGoddamnit', -)) +@pytest.mark.parametrize( + "test_redditor_name", + ( + "nasa", + "crowdstrike", + "HannibalGoddamnit", + ), +) def test_check_user_existence_good( - test_redditor_name: str, - reddit_instance: praw.Reddit, - downloader_mock: MagicMock, + test_redditor_name: str, + reddit_instance: praw.Reddit, + downloader_mock: MagicMock, ): downloader_mock.reddit_instance = reddit_instance RedditConnector.check_user_existence(downloader_mock, test_redditor_name) @@ -414,42 +460,46 @@ def test_check_user_existence_good( @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize('test_redditor_name', ( - 'lhnhfkuhwreolo', - 'adlkfmnhglojh', -)) +@pytest.mark.parametrize( + "test_redditor_name", + ( + "lhnhfkuhwreolo", + "adlkfmnhglojh", + ), +) def test_check_user_existence_nonexistent( - test_redditor_name: str, - reddit_instance: praw.Reddit, - downloader_mock: MagicMock, + test_redditor_name: str, + reddit_instance: praw.Reddit, + downloader_mock: MagicMock, ): downloader_mock.reddit_instance = reddit_instance - with pytest.raises(BulkDownloaderException, match='Could not find'): + with pytest.raises(BulkDownloaderException, match="Could not find"): RedditConnector.check_user_existence(downloader_mock, test_redditor_name) @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize('test_redditor_name', ( - 'Bree-Boo', -)) +@pytest.mark.parametrize("test_redditor_name", ("Bree-Boo",)) def test_check_user_existence_banned( - test_redditor_name: str, - reddit_instance: praw.Reddit, - downloader_mock: MagicMock, + test_redditor_name: str, + reddit_instance: praw.Reddit, + downloader_mock: MagicMock, ): downloader_mock.reddit_instance = reddit_instance - with pytest.raises(BulkDownloaderException, match='is banned'): + with pytest.raises(BulkDownloaderException, match="is banned"): RedditConnector.check_user_existence(downloader_mock, test_redditor_name) @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_subreddit_name', 'expected_message'), ( - ('donaldtrump', 'cannot be found'), - ('submitters', 'private and cannot be scraped'), - ('lhnhfkuhwreolo', 'does not exist') -)) +@pytest.mark.parametrize( + ("test_subreddit_name", "expected_message"), + ( + ("donaldtrump", "cannot be found"), + ("submitters", "private and cannot be scraped"), + ("lhnhfkuhwreolo", "does not exist"), + ), +) def test_check_subreddit_status_bad(test_subreddit_name: str, expected_message: str, reddit_instance: praw.Reddit): test_subreddit = reddit_instance.subreddit(test_subreddit_name) with pytest.raises(BulkDownloaderException, match=expected_message): @@ -458,12 +508,15 @@ def test_check_subreddit_status_bad(test_subreddit_name: str, expected_message: @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize('test_subreddit_name', ( - 'Python', - 'Mindustry', - 'TrollXChromosomes', - 'all', -)) +@pytest.mark.parametrize( + "test_subreddit_name", + ( + "Python", + "Mindustry", + "TrollXChromosomes", + "all", + ), +) def test_check_subreddit_status_good(test_subreddit_name: str, reddit_instance: praw.Reddit): test_subreddit = reddit_instance.subreddit(test_subreddit_name) RedditConnector.check_subreddit_status(test_subreddit) diff --git a/tests/test_download_filter.py b/tests/test_download_filter.py index ce1b260..07b7d67 100644 --- a/tests/test_download_filter.py +++ b/tests/test_download_filter.py @@ -11,55 +11,67 @@ from bdfr.resource import Resource @pytest.fixture() def download_filter() -> DownloadFilter: - return DownloadFilter(['mp4', 'mp3'], ['test.com', 'reddit.com', 'img.example.com']) + return DownloadFilter(["mp4", "mp3"], ["test.com", "reddit.com", "img.example.com"]) -@pytest.mark.parametrize(('test_extension', 'expected'), ( - ('.mp4', False), - ('.avi', True), - ('.random.mp3', False), - ('mp4', False), -)) +@pytest.mark.parametrize( + ("test_extension", "expected"), + ( + (".mp4", False), + (".avi", True), + (".random.mp3", False), + ("mp4", False), + ), +) def test_filter_extension(test_extension: str, expected: bool, download_filter: DownloadFilter): result = download_filter._check_extension(test_extension) assert result == expected -@pytest.mark.parametrize(('test_url', 'expected'), ( - ('test.mp4', True), - ('http://reddit.com/test.mp4', False), - ('http://reddit.com/test.gif', False), - ('https://www.example.com/test.mp4', True), - ('https://www.example.com/test.png', True), - ('https://i.example.com/test.png', True), - ('https://img.example.com/test.png', False), - ('https://i.test.com/test.png', False), -)) +@pytest.mark.parametrize( + ("test_url", "expected"), + ( + ("test.mp4", True), + ("http://reddit.com/test.mp4", False), + ("http://reddit.com/test.gif", False), + ("https://www.example.com/test.mp4", True), + ("https://www.example.com/test.png", True), + ("https://i.example.com/test.png", True), + ("https://img.example.com/test.png", False), + ("https://i.test.com/test.png", False), + ), +) def test_filter_domain(test_url: str, expected: bool, download_filter: DownloadFilter): result = download_filter._check_domain(test_url) assert result == expected -@pytest.mark.parametrize(('test_url', 'expected'), ( - ('test.mp4', False), - ('test.gif', True), - ('https://www.example.com/test.mp4', False), - ('https://www.example.com/test.png', True), - ('http://reddit.com/test.mp4', False), - ('http://reddit.com/test.gif', False), -)) +@pytest.mark.parametrize( + ("test_url", "expected"), + ( + ("test.mp4", False), + ("test.gif", True), + ("https://www.example.com/test.mp4", False), + ("https://www.example.com/test.png", True), + ("http://reddit.com/test.mp4", False), + ("http://reddit.com/test.gif", False), + ), +) def test_filter_all(test_url: str, expected: bool, download_filter: DownloadFilter): test_resource = Resource(MagicMock(), test_url, lambda: None) result = download_filter.check_resource(test_resource) assert result == expected -@pytest.mark.parametrize('test_url', ( - 'test.mp3', - 'test.mp4', - 'http://reddit.com/test.mp4', - 't', -)) +@pytest.mark.parametrize( + "test_url", + ( + "test.mp3", + "test.mp4", + "http://reddit.com/test.mp4", + "t", + ), +) def test_filter_empty_filter(test_url: str): download_filter = DownloadFilter() test_resource = Resource(MagicMock(), test_url, lambda: None) diff --git a/tests/test_downloader.py b/tests/test_downloader.py index e92d870..7b81a85 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -18,7 +18,7 @@ from bdfr.downloader import RedditDownloader @pytest.fixture() def args() -> Configuration: args = Configuration() - args.time_format = 'ISO' + args.time_format = "ISO" return args @@ -32,29 +32,32 @@ def downloader_mock(args: Configuration): return downloader_mock -@pytest.mark.parametrize(('test_ids', 'test_excluded', 'expected_len'), ( - (('aaaaaa',), (), 1), - (('aaaaaa',), ('aaaaaa',), 0), - ((), ('aaaaaa',), 0), - (('aaaaaa', 'bbbbbb'), ('aaaaaa',), 1), - (('aaaaaa', 'bbbbbb', 'cccccc'), ('aaaaaa',), 2), -)) -@patch('bdfr.site_downloaders.download_factory.DownloadFactory.pull_lever') +@pytest.mark.parametrize( + ("test_ids", "test_excluded", "expected_len"), + ( + (("aaaaaa",), (), 1), + (("aaaaaa",), ("aaaaaa",), 0), + ((), ("aaaaaa",), 0), + (("aaaaaa", "bbbbbb"), ("aaaaaa",), 1), + (("aaaaaa", "bbbbbb", "cccccc"), ("aaaaaa",), 2), + ), +) +@patch("bdfr.site_downloaders.download_factory.DownloadFactory.pull_lever") def test_excluded_ids( - mock_function: MagicMock, - test_ids: tuple[str], - test_excluded: tuple[str], - expected_len: int, - downloader_mock: MagicMock, + mock_function: MagicMock, + test_ids: tuple[str], + test_excluded: tuple[str], + expected_len: int, + downloader_mock: MagicMock, ): downloader_mock.excluded_submission_ids = test_excluded mock_function.return_value = MagicMock() - mock_function.return_value.__name__ = 'test' + mock_function.return_value.__name__ = "test" test_submissions = [] for test_id in test_ids: m = MagicMock() m.id = test_id - m.subreddit.display_name.return_value = 'https://www.example.com/' + m.subreddit.display_name.return_value = "https://www.example.com/" m.__class__ = praw.models.Submission test_submissions.append(m) downloader_mock.reddit_lists = [test_submissions] @@ -65,32 +68,27 @@ def test_excluded_ids( @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize('test_submission_id', ( - 'm1hqw6', -)) +@pytest.mark.parametrize("test_submission_id", ("m1hqw6",)) def test_mark_hard_link( - test_submission_id: str, - downloader_mock: MagicMock, - tmp_path: Path, - reddit_instance: praw.Reddit + test_submission_id: str, downloader_mock: MagicMock, tmp_path: Path, reddit_instance: praw.Reddit ): downloader_mock.reddit_instance = reddit_instance downloader_mock.args.make_hard_links = True downloader_mock.download_directory = tmp_path - downloader_mock.args.folder_scheme = '' - downloader_mock.args.file_scheme = '{POSTID}' + downloader_mock.args.folder_scheme = "" + downloader_mock.args.file_scheme = "{POSTID}" downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock) submission = downloader_mock.reddit_instance.submission(id=test_submission_id) - original = Path(tmp_path, f'{test_submission_id}.png') + original = Path(tmp_path, f"{test_submission_id}.png") RedditDownloader._download_submission(downloader_mock, submission) assert original.exists() - downloader_mock.args.file_scheme = 'test2_{POSTID}' + downloader_mock.args.file_scheme = "test2_{POSTID}" downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock) RedditDownloader._download_submission(downloader_mock, submission) test_file_1_stats = original.stat() - test_file_2_inode = Path(tmp_path, f'test2_{test_submission_id}.png').stat().st_ino + test_file_2_inode = Path(tmp_path, f"test2_{test_submission_id}.png").stat().st_ino assert test_file_1_stats.st_nlink == 2 assert test_file_1_stats.st_ino == test_file_2_inode @@ -98,20 +96,18 @@ def test_mark_hard_link( @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_submission_id', 'test_creation_date'), ( - ('ndzz50', 1621204841.0), -)) +@pytest.mark.parametrize(("test_submission_id", "test_creation_date"), (("ndzz50", 1621204841.0),)) def test_file_creation_date( - test_submission_id: str, - test_creation_date: float, - downloader_mock: MagicMock, - tmp_path: Path, - reddit_instance: praw.Reddit + test_submission_id: str, + test_creation_date: float, + downloader_mock: MagicMock, + tmp_path: Path, + reddit_instance: praw.Reddit, ): downloader_mock.reddit_instance = reddit_instance downloader_mock.download_directory = tmp_path - downloader_mock.args.folder_scheme = '' - downloader_mock.args.file_scheme = '{POSTID}' + downloader_mock.args.folder_scheme = "" + downloader_mock.args.file_scheme = "{POSTID}" downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock) submission = downloader_mock.reddit_instance.submission(id=test_submission_id) @@ -123,27 +119,25 @@ def test_file_creation_date( def test_search_existing_files(): - results = RedditDownloader.scan_existing_files(Path('.')) + results = RedditDownloader.scan_existing_files(Path(".")) assert len(results.keys()) != 0 @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_submission_id', 'test_hash'), ( - ('m1hqw6', 'a912af8905ae468e0121e9940f797ad7'), -)) +@pytest.mark.parametrize(("test_submission_id", "test_hash"), (("m1hqw6", "a912af8905ae468e0121e9940f797ad7"),)) def test_download_submission_hash_exists( - test_submission_id: str, - test_hash: str, - downloader_mock: MagicMock, - reddit_instance: praw.Reddit, - tmp_path: Path, - capsys: pytest.CaptureFixture + test_submission_id: str, + test_hash: str, + downloader_mock: MagicMock, + reddit_instance: praw.Reddit, + tmp_path: Path, + capsys: pytest.CaptureFixture, ): setup_logging(3) downloader_mock.reddit_instance = reddit_instance downloader_mock.download_filter.check_url.return_value = True - downloader_mock.args.folder_scheme = '' + downloader_mock.args.folder_scheme = "" downloader_mock.args.no_dupes = True downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock) downloader_mock.download_directory = tmp_path @@ -153,47 +147,44 @@ def test_download_submission_hash_exists( folder_contents = list(tmp_path.iterdir()) output = capsys.readouterr() assert not folder_contents - assert re.search(r'Resource hash .*? downloaded elsewhere', output.out) + assert re.search(r"Resource hash .*? downloaded elsewhere", output.out) @pytest.mark.online @pytest.mark.reddit def test_download_submission_file_exists( - downloader_mock: MagicMock, - reddit_instance: praw.Reddit, - tmp_path: Path, - capsys: pytest.CaptureFixture + downloader_mock: MagicMock, reddit_instance: praw.Reddit, tmp_path: Path, capsys: pytest.CaptureFixture ): setup_logging(3) downloader_mock.reddit_instance = reddit_instance downloader_mock.download_filter.check_url.return_value = True - downloader_mock.args.folder_scheme = '' + downloader_mock.args.folder_scheme = "" downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock) downloader_mock.download_directory = tmp_path - submission = downloader_mock.reddit_instance.submission(id='m1hqw6') - Path(tmp_path, 'Arneeman_Metagaming isn\'t always a bad thing_m1hqw6.png').touch() + submission = downloader_mock.reddit_instance.submission(id="m1hqw6") + Path(tmp_path, "Arneeman_Metagaming isn't always a bad thing_m1hqw6.png").touch() RedditDownloader._download_submission(downloader_mock, submission) folder_contents = list(tmp_path.iterdir()) output = capsys.readouterr() assert len(folder_contents) == 1 - assert 'Arneeman_Metagaming isn\'t always a bad thing_m1hqw6.png'\ - ' from submission m1hqw6 already exists' in output.out + assert ( + "Arneeman_Metagaming isn't always a bad thing_m1hqw6.png" " from submission m1hqw6 already exists" in output.out + ) @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_submission_id', 'expected_files_len'), ( - ('ljyy27', 4), -)) +@pytest.mark.parametrize(("test_submission_id", "expected_files_len"), (("ljyy27", 4),)) def test_download_submission( - test_submission_id: str, - expected_files_len: int, - downloader_mock: MagicMock, - reddit_instance: praw.Reddit, - tmp_path: Path): + test_submission_id: str, + expected_files_len: int, + downloader_mock: MagicMock, + reddit_instance: praw.Reddit, + tmp_path: Path, +): downloader_mock.reddit_instance = reddit_instance downloader_mock.download_filter.check_url.return_value = True - downloader_mock.args.folder_scheme = '' + downloader_mock.args.folder_scheme = "" downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock) downloader_mock.download_directory = tmp_path submission = downloader_mock.reddit_instance.submission(id=test_submission_id) @@ -204,103 +195,95 @@ def test_download_submission( @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_submission_id', 'min_score'), ( - ('ljyy27', 1), -)) +@pytest.mark.parametrize(("test_submission_id", "min_score"), (("ljyy27", 1),)) def test_download_submission_min_score_above( - test_submission_id: str, - min_score: int, - downloader_mock: MagicMock, - reddit_instance: praw.Reddit, - tmp_path: Path, - capsys: pytest.CaptureFixture, + test_submission_id: str, + min_score: int, + downloader_mock: MagicMock, + reddit_instance: praw.Reddit, + tmp_path: Path, + capsys: pytest.CaptureFixture, ): setup_logging(3) downloader_mock.reddit_instance = reddit_instance downloader_mock.download_filter.check_url.return_value = True - downloader_mock.args.folder_scheme = '' + downloader_mock.args.folder_scheme = "" downloader_mock.args.min_score = min_score downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock) downloader_mock.download_directory = tmp_path submission = downloader_mock.reddit_instance.submission(id=test_submission_id) RedditDownloader._download_submission(downloader_mock, submission) output = capsys.readouterr() - assert 'filtered due to score' not in output.out + assert "filtered due to score" not in output.out @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_submission_id', 'min_score'), ( - ('ljyy27', 25), -)) +@pytest.mark.parametrize(("test_submission_id", "min_score"), (("ljyy27", 25),)) def test_download_submission_min_score_below( - test_submission_id: str, - min_score: int, - downloader_mock: MagicMock, - reddit_instance: praw.Reddit, - tmp_path: Path, - capsys: pytest.CaptureFixture, + test_submission_id: str, + min_score: int, + downloader_mock: MagicMock, + reddit_instance: praw.Reddit, + tmp_path: Path, + capsys: pytest.CaptureFixture, ): setup_logging(3) downloader_mock.reddit_instance = reddit_instance downloader_mock.download_filter.check_url.return_value = True - downloader_mock.args.folder_scheme = '' + downloader_mock.args.folder_scheme = "" downloader_mock.args.min_score = min_score downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock) downloader_mock.download_directory = tmp_path submission = downloader_mock.reddit_instance.submission(id=test_submission_id) RedditDownloader._download_submission(downloader_mock, submission) output = capsys.readouterr() - assert 'filtered due to score' in output.out + assert "filtered due to score" in output.out @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_submission_id', 'max_score'), ( - ('ljyy27', 25), -)) +@pytest.mark.parametrize(("test_submission_id", "max_score"), (("ljyy27", 25),)) def test_download_submission_max_score_below( - test_submission_id: str, - max_score: int, - downloader_mock: MagicMock, - reddit_instance: praw.Reddit, - tmp_path: Path, - capsys: pytest.CaptureFixture, + test_submission_id: str, + max_score: int, + downloader_mock: MagicMock, + reddit_instance: praw.Reddit, + tmp_path: Path, + capsys: pytest.CaptureFixture, ): setup_logging(3) downloader_mock.reddit_instance = reddit_instance downloader_mock.download_filter.check_url.return_value = True - downloader_mock.args.folder_scheme = '' + downloader_mock.args.folder_scheme = "" downloader_mock.args.max_score = max_score downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock) downloader_mock.download_directory = tmp_path submission = downloader_mock.reddit_instance.submission(id=test_submission_id) RedditDownloader._download_submission(downloader_mock, submission) output = capsys.readouterr() - assert 'filtered due to score' not in output.out + assert "filtered due to score" not in output.out @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_submission_id', 'max_score'), ( - ('ljyy27', 1), -)) +@pytest.mark.parametrize(("test_submission_id", "max_score"), (("ljyy27", 1),)) def test_download_submission_max_score_above( - test_submission_id: str, - max_score: int, - downloader_mock: MagicMock, - reddit_instance: praw.Reddit, - tmp_path: Path, - capsys: pytest.CaptureFixture, + test_submission_id: str, + max_score: int, + downloader_mock: MagicMock, + reddit_instance: praw.Reddit, + tmp_path: Path, + capsys: pytest.CaptureFixture, ): setup_logging(3) downloader_mock.reddit_instance = reddit_instance downloader_mock.download_filter.check_url.return_value = True - downloader_mock.args.folder_scheme = '' + downloader_mock.args.folder_scheme = "" downloader_mock.args.max_score = max_score downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock) downloader_mock.download_directory = tmp_path submission = downloader_mock.reddit_instance.submission(id=test_submission_id) RedditDownloader._download_submission(downloader_mock, submission) output = capsys.readouterr() - assert 'filtered due to score' in output.out + assert "filtered due to score" in output.out diff --git a/tests/test_file_name_formatter.py b/tests/test_file_name_formatter.py index 0492536..c04e07d 100644 --- a/tests/test_file_name_formatter.py +++ b/tests/test_file_name_formatter.py @@ -22,26 +22,26 @@ from bdfr.site_downloaders.self_post import SelfPost @pytest.fixture() def submission() -> MagicMock: test = MagicMock() - test.title = 'name' - test.subreddit.display_name = 'randomreddit' - test.author.name = 'person' - test.id = '12345' + test.title = "name" + test.subreddit.display_name = "randomreddit" + test.author.name = "person" + test.id = "12345" test.score = 1000 - test.link_flair_text = 'test_flair' + test.link_flair_text = "test_flair" test.created_utc = datetime(2021, 4, 21, 9, 30, 0).timestamp() test.__class__ = praw.models.Submission return test def do_test_string_equality(result: Union[Path, str], expected: str) -> bool: - if platform.system() == 'Windows': + if platform.system() == "Windows": expected = FileNameFormatter._format_for_windows(expected) return str(result).endswith(expected) def do_test_path_equality(result: Path, expected: str) -> bool: - if platform.system() == 'Windows': - expected = expected.split('/') + if platform.system() == "Windows": + expected = expected.split("/") expected = [FileNameFormatter._format_for_windows(part) for part in expected] expected = Path(*expected) else: @@ -49,35 +49,41 @@ def do_test_path_equality(result: Path, expected: str) -> bool: return str(result).endswith(str(expected)) -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def reddit_submission(reddit_instance: praw.Reddit) -> praw.models.Submission: - return reddit_instance.submission(id='w22m5l') + return reddit_instance.submission(id="w22m5l") -@pytest.mark.parametrize(('test_format_string', 'expected'), ( - ('{SUBREDDIT}', 'randomreddit'), - ('{REDDITOR}', 'person'), - ('{POSTID}', '12345'), - ('{UPVOTES}', '1000'), - ('{FLAIR}', 'test_flair'), - ('{DATE}', '2021-04-21T09:30:00'), - ('{REDDITOR}_{TITLE}_{POSTID}', 'person_name_12345'), -)) +@pytest.mark.parametrize( + ("test_format_string", "expected"), + ( + ("{SUBREDDIT}", "randomreddit"), + ("{REDDITOR}", "person"), + ("{POSTID}", "12345"), + ("{UPVOTES}", "1000"), + ("{FLAIR}", "test_flair"), + ("{DATE}", "2021-04-21T09:30:00"), + ("{REDDITOR}_{TITLE}_{POSTID}", "person_name_12345"), + ), +) def test_format_name_mock(test_format_string: str, expected: str, submission: MagicMock): - test_formatter = FileNameFormatter(test_format_string, '', 'ISO') + test_formatter = FileNameFormatter(test_format_string, "", "ISO") result = test_formatter._format_name(submission, test_format_string) assert do_test_string_equality(result, expected) -@pytest.mark.parametrize(('test_string', 'expected'), ( - ('', False), - ('test', False), - ('{POSTID}', True), - ('POSTID', False), - ('{POSTID}_test', True), - ('test_{TITLE}', True), - ('TITLE_POSTID', False), -)) +@pytest.mark.parametrize( + ("test_string", "expected"), + ( + ("", False), + ("test", False), + ("{POSTID}", True), + ("POSTID", False), + ("{POSTID}_test", True), + ("test_{TITLE}", True), + ("TITLE_POSTID", False), + ), +) def test_check_format_string_validity(test_string: str, expected: bool): result = FileNameFormatter.validate_string(test_string) assert result == expected @@ -85,84 +91,98 @@ def test_check_format_string_validity(test_string: str, expected: bool): @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_format_string', 'expected'), ( - ('{SUBREDDIT}', 'formula1'), - ('{REDDITOR}', 'Kirsty-Blue'), - ('{POSTID}', 'w22m5l'), - ('{FLAIR}', 'Social Media rall'), - ('{SUBREDDIT}_{TITLE}', 'formula1_George Russel acknowledges the Twitter trend about him'), - ('{REDDITOR}_{TITLE}_{POSTID}', 'Kirsty-Blue_George Russel acknowledges the Twitter trend about him_w22m5l') -)) +@pytest.mark.parametrize( + ("test_format_string", "expected"), + ( + ("{SUBREDDIT}", "formula1"), + ("{REDDITOR}", "Kirsty-Blue"), + ("{POSTID}", "w22m5l"), + ("{FLAIR}", "Social Media rall"), + ("{SUBREDDIT}_{TITLE}", "formula1_George Russel acknowledges the Twitter trend about him"), + ("{REDDITOR}_{TITLE}_{POSTID}", "Kirsty-Blue_George Russel acknowledges the Twitter trend about him_w22m5l"), + ), +) def test_format_name_real(test_format_string: str, expected: str, reddit_submission: praw.models.Submission): - test_formatter = FileNameFormatter(test_format_string, '', '') + test_formatter = FileNameFormatter(test_format_string, "", "") result = test_formatter._format_name(reddit_submission, test_format_string) assert do_test_string_equality(result, expected) @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('format_string_directory', 'format_string_file', 'expected'), ( +@pytest.mark.parametrize( + ("format_string_directory", "format_string_file", "expected"), ( - '{SUBREDDIT}', - '{POSTID}', - 'test/formula1/w22m5l.png', + ( + "{SUBREDDIT}", + "{POSTID}", + "test/formula1/w22m5l.png", + ), + ( + "{SUBREDDIT}", + "{TITLE}_{POSTID}", + "test/formula1/George Russel acknowledges the Twitter trend about him_w22m5l.png", + ), + ( + "{SUBREDDIT}", + "{REDDITOR}_{TITLE}_{POSTID}", + "test/formula1/Kirsty-Blue_George Russel acknowledges the Twitter trend about him_w22m5l.png", + ), ), - ( - '{SUBREDDIT}', - '{TITLE}_{POSTID}', - 'test/formula1/George Russel acknowledges the Twitter trend about him_w22m5l.png', - ), - ( - '{SUBREDDIT}', - '{REDDITOR}_{TITLE}_{POSTID}', - 'test/formula1/Kirsty-Blue_George Russel acknowledges the Twitter trend about him_w22m5l.png', - ), -)) +) def test_format_full( - format_string_directory: str, - format_string_file: str, - expected: str, - reddit_submission: praw.models.Submission): - test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png', lambda: None) - test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO') - result = test_formatter.format_path(test_resource, Path('test')) + format_string_directory: str, format_string_file: str, expected: str, reddit_submission: praw.models.Submission +): + test_resource = Resource(reddit_submission, "i.reddit.com/blabla.png", lambda: None) + test_formatter = FileNameFormatter(format_string_file, format_string_directory, "ISO") + result = test_formatter.format_path(test_resource, Path("test")) assert do_test_path_equality(result, expected) @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('format_string_directory', 'format_string_file'), ( - ('{SUBREDDIT}', '{POSTID}'), - ('{SUBREDDIT}', '{UPVOTES}'), - ('{SUBREDDIT}', '{UPVOTES}{POSTID}'), -)) +@pytest.mark.parametrize( + ("format_string_directory", "format_string_file"), + ( + ("{SUBREDDIT}", "{POSTID}"), + ("{SUBREDDIT}", "{UPVOTES}"), + ("{SUBREDDIT}", "{UPVOTES}{POSTID}"), + ), +) def test_format_full_conform( - format_string_directory: str, - format_string_file: str, - reddit_submission: praw.models.Submission): - test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png', lambda: None) - test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO') - test_formatter.format_path(test_resource, Path('test')) + format_string_directory: str, format_string_file: str, reddit_submission: praw.models.Submission +): + test_resource = Resource(reddit_submission, "i.reddit.com/blabla.png", lambda: None) + test_formatter = FileNameFormatter(format_string_file, format_string_directory, "ISO") + test_formatter.format_path(test_resource, Path("test")) @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('format_string_directory', 'format_string_file', 'index', 'expected'), ( - ('{SUBREDDIT}', '{POSTID}', None, 'test/formula1/w22m5l.png'), - ('{SUBREDDIT}', '{POSTID}', 1, 'test/formula1/w22m5l_1.png'), - ('{SUBREDDIT}', '{POSTID}', 2, 'test/formula1/w22m5l_2.png'), - ('{SUBREDDIT}', '{TITLE}_{POSTID}', 2, 'test/formula1/George Russel acknowledges the Twitter trend about him_w22m5l_2.png'), -)) +@pytest.mark.parametrize( + ("format_string_directory", "format_string_file", "index", "expected"), + ( + ("{SUBREDDIT}", "{POSTID}", None, "test/formula1/w22m5l.png"), + ("{SUBREDDIT}", "{POSTID}", 1, "test/formula1/w22m5l_1.png"), + ("{SUBREDDIT}", "{POSTID}", 2, "test/formula1/w22m5l_2.png"), + ( + "{SUBREDDIT}", + "{TITLE}_{POSTID}", + 2, + "test/formula1/George Russel acknowledges the Twitter trend about him_w22m5l_2.png", + ), + ), +) def test_format_full_with_index_suffix( - format_string_directory: str, - format_string_file: str, - index: Optional[int], - expected: str, - reddit_submission: praw.models.Submission, + format_string_directory: str, + format_string_file: str, + index: Optional[int], + expected: str, + reddit_submission: praw.models.Submission, ): - test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png', lambda: None) - test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO') - result = test_formatter.format_path(test_resource, Path('test'), index) + test_resource = Resource(reddit_submission, "i.reddit.com/blabla.png", lambda: None) + test_formatter = FileNameFormatter(format_string_file, format_string_directory, "ISO") + result = test_formatter.format_path(test_resource, Path("test"), index) assert do_test_path_equality(result, expected) @@ -170,99 +190,114 @@ def test_format_multiple_resources(): mocks = [] for i in range(1, 5): new_mock = MagicMock() - new_mock.url = 'https://example.com/test.png' - new_mock.extension = '.png' - new_mock.source_submission.title = 'test' + new_mock.url = "https://example.com/test.png" + new_mock.extension = ".png" + new_mock.source_submission.title = "test" new_mock.source_submission.__class__ = praw.models.Submission mocks.append(new_mock) - test_formatter = FileNameFormatter('{TITLE}', '', 'ISO') - results = test_formatter.format_resource_paths(mocks, Path('.')) + test_formatter = FileNameFormatter("{TITLE}", "", "ISO") + results = test_formatter.format_resource_paths(mocks, Path(".")) results = set([str(res[0].name) for res in results]) - expected = {'test_1.png', 'test_2.png', 'test_3.png', 'test_4.png'} + expected = {"test_1.png", "test_2.png", "test_3.png", "test_4.png"} assert results == expected -@pytest.mark.parametrize(('test_filename', 'test_ending'), ( - ('A' * 300, '.png'), - ('A' * 300, '_1.png'), - ('a' * 300, '_1000.jpeg'), - ('😍💕✨' * 100, '_1.png'), -)) +@pytest.mark.parametrize( + ("test_filename", "test_ending"), + ( + ("A" * 300, ".png"), + ("A" * 300, "_1.png"), + ("a" * 300, "_1000.jpeg"), + ("😍💕✨" * 100, "_1.png"), + ), +) def test_limit_filename_length(test_filename: str, test_ending: str): - result = FileNameFormatter.limit_file_name_length(test_filename, test_ending, Path('.')) + result = FileNameFormatter.limit_file_name_length(test_filename, test_ending, Path(".")) assert len(result.name) <= 255 - assert len(result.name.encode('utf-8')) <= 255 + assert len(result.name.encode("utf-8")) <= 255 assert len(str(result)) <= FileNameFormatter.find_max_path_length() assert isinstance(result, Path) -@pytest.mark.parametrize(('test_filename', 'test_ending', 'expected_end'), ( - ('test_aaaaaa', '_1.png', 'test_aaaaaa_1.png'), - ('test_aataaa', '_1.png', 'test_aataaa_1.png'), - ('test_abcdef', '_1.png', 'test_abcdef_1.png'), - ('test_aaaaaa', '.png', 'test_aaaaaa.png'), - ('test', '_1.png', 'test_1.png'), - ('test_m1hqw6', '_1.png', 'test_m1hqw6_1.png'), - ('A' * 300 + '_bbbccc', '.png', '_bbbccc.png'), - ('A' * 300 + '_bbbccc', '_1000.jpeg', '_bbbccc_1000.jpeg'), - ('😍💕✨' * 100 + '_aaa1aa', '_1.png', '_aaa1aa_1.png'), -)) +@pytest.mark.parametrize( + ("test_filename", "test_ending", "expected_end"), + ( + ("test_aaaaaa", "_1.png", "test_aaaaaa_1.png"), + ("test_aataaa", "_1.png", "test_aataaa_1.png"), + ("test_abcdef", "_1.png", "test_abcdef_1.png"), + ("test_aaaaaa", ".png", "test_aaaaaa.png"), + ("test", "_1.png", "test_1.png"), + ("test_m1hqw6", "_1.png", "test_m1hqw6_1.png"), + ("A" * 300 + "_bbbccc", ".png", "_bbbccc.png"), + ("A" * 300 + "_bbbccc", "_1000.jpeg", "_bbbccc_1000.jpeg"), + ("😍💕✨" * 100 + "_aaa1aa", "_1.png", "_aaa1aa_1.png"), + ), +) def test_preserve_id_append_when_shortening(test_filename: str, test_ending: str, expected_end: str): - result = FileNameFormatter.limit_file_name_length(test_filename, test_ending, Path('.')) + result = FileNameFormatter.limit_file_name_length(test_filename, test_ending, Path(".")) assert len(result.name) <= 255 - assert len(result.name.encode('utf-8')) <= 255 + assert len(result.name.encode("utf-8")) <= 255 assert result.name.endswith(expected_end) assert len(str(result)) <= FileNameFormatter.find_max_path_length() -@pytest.mark.skipif(sys.platform == 'win32', reason='Test broken on windows github') +@pytest.mark.skipif(sys.platform == "win32", reason="Test broken on windows github") def test_shorten_filename_real(submission: MagicMock, tmp_path: Path): - submission.title = 'A' * 500 - submission.author.name = 'test' - submission.subreddit.display_name = 'test' - submission.id = 'BBBBBB' - test_resource = Resource(submission, 'www.example.com/empty', lambda: None, '.jpeg') - test_formatter = FileNameFormatter('{REDDITOR}_{TITLE}_{POSTID}', '{SUBREDDIT}', 'ISO') + submission.title = "A" * 500 + submission.author.name = "test" + submission.subreddit.display_name = "test" + submission.id = "BBBBBB" + test_resource = Resource(submission, "www.example.com/empty", lambda: None, ".jpeg") + test_formatter = FileNameFormatter("{REDDITOR}_{TITLE}_{POSTID}", "{SUBREDDIT}", "ISO") result = test_formatter.format_path(test_resource, tmp_path) result.parent.mkdir(parents=True) result.touch() -@pytest.mark.parametrize(('test_name', 'test_ending'), ( - ('a', 'b'), - ('a', '_bbbbbb.jpg'), - ('a' * 20, '_bbbbbb.jpg'), - ('a' * 50, '_bbbbbb.jpg'), - ('a' * 500, '_bbbbbb.jpg'), -)) +@pytest.mark.parametrize( + ("test_name", "test_ending"), + ( + ("a", "b"), + ("a", "_bbbbbb.jpg"), + ("a" * 20, "_bbbbbb.jpg"), + ("a" * 50, "_bbbbbb.jpg"), + ("a" * 500, "_bbbbbb.jpg"), + ), +) def test_shorten_path(test_name: str, test_ending: str, tmp_path: Path): result = FileNameFormatter.limit_file_name_length(test_name, test_ending, tmp_path) assert len(str(result.name)) <= 255 - assert len(str(result.name).encode('UTF-8')) <= 255 - assert len(str(result.name).encode('cp1252')) <= 255 + assert len(str(result.name).encode("UTF-8")) <= 255 + assert len(str(result.name).encode("cp1252")) <= 255 assert len(str(result)) <= FileNameFormatter.find_max_path_length() -@pytest.mark.parametrize(('test_string', 'expected'), ( - ('test', 'test'), - ('test😍', 'test'), - ('test.png', 'test.png'), - ('test*', 'test'), - ('test**', 'test'), - ('test?*', 'test'), - ('test_???.png', 'test_.png'), - ('test_???😍.png', 'test_.png'), -)) +@pytest.mark.parametrize( + ("test_string", "expected"), + ( + ("test", "test"), + ("test😍", "test"), + ("test.png", "test.png"), + ("test*", "test"), + ("test**", "test"), + ("test?*", "test"), + ("test_???.png", "test_.png"), + ("test_???😍.png", "test_.png"), + ), +) def test_format_file_name_for_windows(test_string: str, expected: str): result = FileNameFormatter._format_for_windows(test_string) assert result == expected -@pytest.mark.parametrize(('test_string', 'expected'), ( - ('test', 'test'), - ('test😍', 'test'), - ('😍', ''), -)) +@pytest.mark.parametrize( + ("test_string", "expected"), + ( + ("test", "test"), + ("test😍", "test"), + ("😍", ""), + ), +) def test_strip_emojies(test_string: str, expected: str): result = FileNameFormatter._strip_emojis(test_string) assert result == expected @@ -270,121 +305,151 @@ def test_strip_emojies(test_string: str, expected: str): @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_submission_id', 'expected'), ( - ('mfuteh', { - 'title': 'Why Do Interviewers Ask Linked List Questions?', - 'redditor': 'mjgardner', - }), -)) +@pytest.mark.parametrize( + ("test_submission_id", "expected"), + ( + ( + "mfuteh", + { + "title": "Why Do Interviewers Ask Linked List Questions?", + "redditor": "mjgardner", + }, + ), + ), +) def test_generate_dict_for_submission(test_submission_id: str, expected: dict, reddit_instance: praw.Reddit): test_submission = reddit_instance.submission(id=test_submission_id) - test_formatter = FileNameFormatter('{TITLE}', '', 'ISO') + test_formatter = FileNameFormatter("{TITLE}", "", "ISO") result = test_formatter._generate_name_dict_from_submission(test_submission) assert all([result.get(key) == expected[key] for key in expected.keys()]) @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_comment_id', 'expected'), ( - ('gsq0yuw', { - 'title': 'Why Do Interviewers Ask Linked List Questions?', - 'redditor': 'Doctor-Dapper', - 'postid': 'gsq0yuw', - 'flair': '', - }), -)) +@pytest.mark.parametrize( + ("test_comment_id", "expected"), + ( + ( + "gsq0yuw", + { + "title": "Why Do Interviewers Ask Linked List Questions?", + "redditor": "Doctor-Dapper", + "postid": "gsq0yuw", + "flair": "", + }, + ), + ), +) def test_generate_dict_for_comment(test_comment_id: str, expected: dict, reddit_instance: praw.Reddit): test_comment = reddit_instance.comment(id=test_comment_id) - test_formatter = FileNameFormatter('{TITLE}', '', 'ISO') + test_formatter = FileNameFormatter("{TITLE}", "", "ISO") result = test_formatter._generate_name_dict_from_comment(test_comment) assert all([result.get(key) == expected[key] for key in expected.keys()]) @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_file_scheme', 'test_folder_scheme', 'test_comment_id', 'expected_name'), ( - ('{POSTID}', '', 'gsoubde', 'gsoubde.json'), - ('{REDDITOR}_{POSTID}', '', 'gsoubde', 'DELETED_gsoubde.json'), -)) +@pytest.mark.parametrize( + ("test_file_scheme", "test_folder_scheme", "test_comment_id", "expected_name"), + ( + ("{POSTID}", "", "gsoubde", "gsoubde.json"), + ("{REDDITOR}_{POSTID}", "", "gsoubde", "DELETED_gsoubde.json"), + ), +) def test_format_archive_entry_comment( - test_file_scheme: str, - test_folder_scheme: str, - test_comment_id: str, - expected_name: str, - tmp_path: Path, - reddit_instance: praw.Reddit, + test_file_scheme: str, + test_folder_scheme: str, + test_comment_id: str, + expected_name: str, + tmp_path: Path, + reddit_instance: praw.Reddit, ): test_comment = reddit_instance.comment(id=test_comment_id) - test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme, 'ISO') - test_entry = Resource(test_comment, '', lambda: None, '.json') + test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme, "ISO") + test_entry = Resource(test_comment, "", lambda: None, ".json") result = test_formatter.format_path(test_entry, tmp_path) assert do_test_string_equality(result, expected_name) -@pytest.mark.parametrize(('test_folder_scheme', 'expected'), ( - ('{REDDITOR}/{SUBREDDIT}', 'person/randomreddit'), - ('{POSTID}/{SUBREDDIT}/{REDDITOR}', '12345/randomreddit/person'), -)) +@pytest.mark.parametrize( + ("test_folder_scheme", "expected"), + ( + ("{REDDITOR}/{SUBREDDIT}", "person/randomreddit"), + ("{POSTID}/{SUBREDDIT}/{REDDITOR}", "12345/randomreddit/person"), + ), +) def test_multilevel_folder_scheme( - test_folder_scheme: str, - expected: str, - tmp_path: Path, - submission: MagicMock, + test_folder_scheme: str, + expected: str, + tmp_path: Path, + submission: MagicMock, ): - test_formatter = FileNameFormatter('{POSTID}', test_folder_scheme, 'ISO') + test_formatter = FileNameFormatter("{POSTID}", test_folder_scheme, "ISO") test_resource = MagicMock() test_resource.source_submission = submission - test_resource.extension = '.png' + test_resource.extension = ".png" result = test_formatter.format_path(test_resource, tmp_path) result = result.relative_to(tmp_path) assert do_test_path_equality(result.parent, expected) - assert len(result.parents) == (len(expected.split('/')) + 1) + assert len(result.parents) == (len(expected.split("/")) + 1) -@pytest.mark.parametrize(('test_name_string', 'expected'), ( - ('test', 'test'), - ('😍', '😍'), - ('test😍', 'test😍'), - ('test😍 ’', 'test😍 ’'), - ('test😍 \\u2019', 'test😍 ’'), - ('Using that real good [1\\4]', 'Using that real good [1\\4]'), -)) +@pytest.mark.parametrize( + ("test_name_string", "expected"), + ( + ("test", "test"), + ("😍", "😍"), + ("test😍", "test😍"), + ("test😍 ’", "test😍 ’"), + ("test😍 \\u2019", "test😍 ’"), + ("Using that real good [1\\4]", "Using that real good [1\\4]"), + ), +) def test_preserve_emojis(test_name_string: str, expected: str, submission: MagicMock): submission.title = test_name_string - test_formatter = FileNameFormatter('{TITLE}', '', 'ISO') - result = test_formatter._format_name(submission, '{TITLE}') + test_formatter = FileNameFormatter("{TITLE}", "", "ISO") + result = test_formatter._format_name(submission, "{TITLE}") assert do_test_string_equality(result, expected) -@pytest.mark.parametrize(('test_string', 'expected'), ( - ('test \\u2019', 'test ’'), - ('My cat\\u2019s paws are so cute', 'My cat’s paws are so cute'), -)) +@pytest.mark.parametrize( + ("test_string", "expected"), + ( + ("test \\u2019", "test ’"), + ("My cat\\u2019s paws are so cute", "My cat’s paws are so cute"), + ), +) def test_convert_unicode_escapes(test_string: str, expected: str): result = FileNameFormatter._convert_unicode_escapes(test_string) assert result == expected -@pytest.mark.parametrize(('test_datetime', 'expected'), ( - (datetime(2020, 1, 1, 8, 0, 0), '2020-01-01T08:00:00'), - (datetime(2020, 1, 1, 8, 0), '2020-01-01T08:00:00'), - (datetime(2021, 4, 21, 8, 30, 21), '2021-04-21T08:30:21'), -)) +@pytest.mark.parametrize( + ("test_datetime", "expected"), + ( + (datetime(2020, 1, 1, 8, 0, 0), "2020-01-01T08:00:00"), + (datetime(2020, 1, 1, 8, 0), "2020-01-01T08:00:00"), + (datetime(2021, 4, 21, 8, 30, 21), "2021-04-21T08:30:21"), + ), +) def test_convert_timestamp(test_datetime: datetime, expected: str): test_timestamp = test_datetime.timestamp() - test_formatter = FileNameFormatter('{POSTID}', '', 'ISO') + test_formatter = FileNameFormatter("{POSTID}", "", "ISO") result = test_formatter._convert_timestamp(test_timestamp) assert result == expected -@pytest.mark.parametrize(('test_time_format', 'expected'), ( - ('ISO', '2021-05-02T13:33:00'), - ('%Y_%m', '2021_05'), - ('%Y-%m-%d', '2021-05-02'), -)) +@pytest.mark.parametrize( + ("test_time_format", "expected"), + ( + ("ISO", "2021-05-02T13:33:00"), + ("%Y_%m", "2021_05"), + ("%Y-%m-%d", "2021-05-02"), + ), +) def test_time_string_formats(test_time_format: str, expected: str): test_time = datetime(2021, 5, 2, 13, 33) - test_formatter = FileNameFormatter('{TITLE}', '', test_time_format) + test_formatter = FileNameFormatter("{TITLE}", "", test_time_format) result = test_formatter._convert_timestamp(test_time.timestamp()) assert result == expected @@ -395,29 +460,32 @@ def test_get_max_path_length(): def test_windows_max_path(tmp_path: Path): - with unittest.mock.patch('platform.system', return_value='Windows'): - with unittest.mock.patch('bdfr.file_name_formatter.FileNameFormatter.find_max_path_length', return_value=260): - result = FileNameFormatter.limit_file_name_length('test' * 100, '_1.png', tmp_path) + with unittest.mock.patch("platform.system", return_value="Windows"): + with unittest.mock.patch("bdfr.file_name_formatter.FileNameFormatter.find_max_path_length", return_value=260): + result = FileNameFormatter.limit_file_name_length("test" * 100, "_1.png", tmp_path) assert len(str(result)) <= 260 assert len(result.name) <= (260 - len(str(tmp_path))) @pytest.mark.online @pytest.mark.reddit -@pytest.mark.parametrize(('test_reddit_id', 'test_downloader', 'expected_names'), ( - ('gphmnr', YtdlpFallback, {'He has a lot to say today.mp4'}), - ('d0oir2', YtdlpFallback, {"Crunk's finest moment. Welcome to the new subreddit!.mp4"}), - ('jiecu', SelfPost, {'[deleted by user].txt'}), -)) +@pytest.mark.parametrize( + ("test_reddit_id", "test_downloader", "expected_names"), + ( + ("gphmnr", YtdlpFallback, {"He has a lot to say today.mp4"}), + ("d0oir2", YtdlpFallback, {"Crunk's finest moment. Welcome to the new subreddit!.mp4"}), + ("jiecu", SelfPost, {"[deleted by user].txt"}), + ), +) def test_name_submission( - test_reddit_id: str, - test_downloader: Type[BaseDownloader], - expected_names: set[str], - reddit_instance: praw.reddit.Reddit, + test_reddit_id: str, + test_downloader: Type[BaseDownloader], + expected_names: set[str], + reddit_instance: praw.reddit.Reddit, ): test_submission = reddit_instance.submission(id=test_reddit_id) test_resources = test_downloader(test_submission).find_resources() - test_formatter = FileNameFormatter('{TITLE}', '', '') - results = test_formatter.format_resource_paths(test_resources, Path('.')) + test_formatter = FileNameFormatter("{TITLE}", "", "") + results = test_formatter.format_resource_paths(test_resources, Path(".")) results = set([r[0].name for r in results]) assert results == expected_names diff --git a/tests/test_oauth2.py b/tests/test_oauth2.py index 71bdca1..3014c37 100644 --- a/tests/test_oauth2.py +++ b/tests/test_oauth2.py @@ -14,38 +14,58 @@ from bdfr.oauth2 import OAuth2Authenticator, OAuth2TokenManager @pytest.fixture() def example_config() -> configparser.ConfigParser: out = configparser.ConfigParser() - config_dict = {'DEFAULT': {'user_token': 'example'}} + config_dict = {"DEFAULT": {"user_token": "example"}} out.read_dict(config_dict) return out @pytest.mark.online -@pytest.mark.parametrize('test_scopes', ( - {'history', }, - {'history', 'creddits'}, - {'account', 'flair'}, - {'*', }, -)) +@pytest.mark.parametrize( + "test_scopes", + ( + { + "history", + }, + {"history", "creddits"}, + {"account", "flair"}, + { + "*", + }, + ), +) def test_check_scopes(test_scopes: set[str]): OAuth2Authenticator._check_scopes(test_scopes) -@pytest.mark.parametrize(('test_scopes', 'expected'), ( - ('history', {'history', }), - ('history creddits', {'history', 'creddits'}), - ('history, creddits, account', {'history', 'creddits', 'account'}), - ('history,creddits,account,flair', {'history', 'creddits', 'account', 'flair'}), -)) +@pytest.mark.parametrize( + ("test_scopes", "expected"), + ( + ( + "history", + { + "history", + }, + ), + ("history creddits", {"history", "creddits"}), + ("history, creddits, account", {"history", "creddits", "account"}), + ("history,creddits,account,flair", {"history", "creddits", "account", "flair"}), + ), +) def test_split_scopes(test_scopes: str, expected: set[str]): result = OAuth2Authenticator.split_scopes(test_scopes) assert result == expected @pytest.mark.online -@pytest.mark.parametrize('test_scopes', ( - {'random', }, - {'scope', 'another_scope'}, -)) +@pytest.mark.parametrize( + "test_scopes", + ( + { + "random", + }, + {"scope", "another_scope"}, + ), +) def test_check_scopes_bad(test_scopes: set[str]): with pytest.raises(BulkDownloaderException): OAuth2Authenticator._check_scopes(test_scopes) @@ -56,16 +76,16 @@ def test_token_manager_read(example_config: configparser.ConfigParser): mock_authoriser.refresh_token = None test_manager = OAuth2TokenManager(example_config, MagicMock()) test_manager.pre_refresh_callback(mock_authoriser) - assert mock_authoriser.refresh_token == example_config.get('DEFAULT', 'user_token') + assert mock_authoriser.refresh_token == example_config.get("DEFAULT", "user_token") def test_token_manager_write(example_config: configparser.ConfigParser, tmp_path: Path): - test_path = tmp_path / 'test.cfg' + test_path = tmp_path / "test.cfg" mock_authoriser = MagicMock() - mock_authoriser.refresh_token = 'changed_token' + mock_authoriser.refresh_token = "changed_token" test_manager = OAuth2TokenManager(example_config, test_path) test_manager.post_refresh_callback(mock_authoriser) - assert example_config.get('DEFAULT', 'user_token') == 'changed_token' - with test_path.open('r') as file: + assert example_config.get("DEFAULT", "user_token") == "changed_token" + with test_path.open("r") as file: file_contents = file.read() - assert 'user_token = changed_token' in file_contents + assert "user_token = changed_token" in file_contents diff --git a/tests/test_resource.py b/tests/test_resource.py index f3bbc9a..146d9a0 100644 --- a/tests/test_resource.py +++ b/tests/test_resource.py @@ -8,18 +8,21 @@ import pytest from bdfr.resource import Resource -@pytest.mark.parametrize(('test_url', 'expected'), ( - ('test.png', '.png'), - ('another.mp4', '.mp4'), - ('test.jpeg', '.jpeg'), - ('http://www.random.com/resource.png', '.png'), - ('https://www.resource.com/test/example.jpg', '.jpg'), - ('hard.png.mp4', '.mp4'), - ('https://preview.redd.it/7zkmr1wqqih61.png?width=237&format=png&auto=webp&s=19de214e634cbcad99', '.png'), - ('test.jpg#test', '.jpg'), - ('test.jpg?width=247#test', '.jpg'), - ('https://www.test.com/test/test2/example.png?random=test#thing', '.png'), -)) +@pytest.mark.parametrize( + ("test_url", "expected"), + ( + ("test.png", ".png"), + ("another.mp4", ".mp4"), + ("test.jpeg", ".jpeg"), + ("http://www.random.com/resource.png", ".png"), + ("https://www.resource.com/test/example.jpg", ".jpg"), + ("hard.png.mp4", ".mp4"), + ("https://preview.redd.it/7zkmr1wqqih61.png?width=237&format=png&auto=webp&s=19de214e634cbcad99", ".png"), + ("test.jpg#test", ".jpg"), + ("test.jpg?width=247#test", ".jpg"), + ("https://www.test.com/test/test2/example.png?random=test#thing", ".png"), + ), +) def test_resource_get_extension(test_url: str, expected: str): test_resource = Resource(MagicMock(), test_url, lambda: None) result = test_resource._determine_extension() @@ -27,9 +30,10 @@ def test_resource_get_extension(test_url: str, expected: str): @pytest.mark.online -@pytest.mark.parametrize(('test_url', 'expected_hash'), ( - ('https://www.iana.org/_img/2013.1/iana-logo-header.svg', '426b3ac01d3584c820f3b7f5985d6623'), -)) +@pytest.mark.parametrize( + ("test_url", "expected_hash"), + (("https://www.iana.org/_img/2013.1/iana-logo-header.svg", "426b3ac01d3584c820f3b7f5985d6623"),), +) def test_download_online_resource(test_url: str, expected_hash: str): test_resource = Resource(MagicMock(), test_url, Resource.retry_download(test_url)) test_resource.download() From 002a2dac4387fd936a706d386efcc6bce1ba69f3 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Dec 2022 15:20:03 +1000 Subject: [PATCH 28/76] Add line length to isort config --- tox.ini | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index e5dce99..a5b8495 100644 --- a/tox.ini +++ b/tox.ini @@ -13,4 +13,5 @@ commands = [isort] profile = black -multi_line_output = 3 \ No newline at end of file +multi_line_output = 3 +line_length = 120 From c4f636c388676314821b3449d875c3ff9b4c9acb Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Dec 2022 15:20:32 +1000 Subject: [PATCH 29/76] Fix import formatting --- bdfr/site_downloaders/fallback_downloaders/ytdlp_fallback.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bdfr/site_downloaders/fallback_downloaders/ytdlp_fallback.py b/bdfr/site_downloaders/fallback_downloaders/ytdlp_fallback.py index 6109b7a..900c8e9 100644 --- a/bdfr/site_downloaders/fallback_downloaders/ytdlp_fallback.py +++ b/bdfr/site_downloaders/fallback_downloaders/ytdlp_fallback.py @@ -9,9 +9,7 @@ from praw.models import Submission from bdfr.exceptions import NotADownloadableLinkError from bdfr.resource import Resource from bdfr.site_authenticator import SiteAuthenticator -from bdfr.site_downloaders.fallback_downloaders.fallback_downloader import ( - BaseFallbackDownloader, -) +from bdfr.site_downloaders.fallback_downloaders.fallback_downloader import BaseFallbackDownloader from bdfr.site_downloaders.youtube import Youtube logger = logging.getLogger(__name__) From 82230a97bcc21f69ffa03a0a0183b55aa952f9e1 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Dec 2022 15:28:53 +1000 Subject: [PATCH 30/76] Add formatting check option --- tox.ini | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tox.ini b/tox.ini index a5b8495..672927b 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,7 @@ [tox] envlist = format + format_check [testenv:format] deps = @@ -11,6 +12,15 @@ commands = isort bdfr tests black bdfr tests --line-length 120 +[testenv:format_check] +deps = + isort + black +skip_install = True +commands = + isort bdfr tests --check + black bdfr tests --line-length 120 --check + [isort] profile = black multi_line_output = 3 From 9c3c5436b57a6528f283efbd67777f3b5e0650b2 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Dec 2022 15:49:41 +1000 Subject: [PATCH 31/76] Add formatting check option for code --- .github/workflows/formatting_check.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/workflows/formatting_check.yml diff --git a/.github/workflows/formatting_check.yml b/.github/workflows/formatting_check.yml new file mode 100644 index 0000000..498d6af --- /dev/null +++ b/.github/workflows/formatting_check.yml @@ -0,0 +1,11 @@ +name: formatting_check +run-name: Check code formatting +on: pull_request +jobs: + formatting_check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: paolorechia/pox@v1.0.1 + with: + tox_env: "format_check" \ No newline at end of file From ee095d4814a2a8459e9edc41295c2df1fa63d10e Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Dec 2022 15:50:35 +1000 Subject: [PATCH 32/76] Expand action scope --- .github/workflows/formatting_check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/formatting_check.yml b/.github/workflows/formatting_check.yml index 498d6af..8a04fc2 100644 --- a/.github/workflows/formatting_check.yml +++ b/.github/workflows/formatting_check.yml @@ -1,6 +1,6 @@ name: formatting_check run-name: Check code formatting -on: pull_request +on: [push, pull_request] jobs: formatting_check: runs-on: ubuntu-latest From 5427ceb29a7ba97f1507935937b4271bd80b5cb9 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Dec 2022 16:34:23 +1000 Subject: [PATCH 33/76] Add markdown linter to format check --- .github/workflows/formatting_check.yml | 1 + .markdown_style.rb | 3 +++ tox.ini | 2 ++ 3 files changed, 6 insertions(+) create mode 100644 .markdown_style.rb diff --git a/.github/workflows/formatting_check.yml b/.github/workflows/formatting_check.yml index 8a04fc2..4941d83 100644 --- a/.github/workflows/formatting_check.yml +++ b/.github/workflows/formatting_check.yml @@ -7,5 +7,6 @@ jobs: steps: - uses: actions/checkout@v3 - uses: paolorechia/pox@v1.0.1 + - uses: actionshub/markdownlint@main with: tox_env: "format_check" \ No newline at end of file diff --git a/.markdown_style.rb b/.markdown_style.rb new file mode 100644 index 0000000..61f127b --- /dev/null +++ b/.markdown_style.rb @@ -0,0 +1,3 @@ +all +exclude_tag :line_length +rule 'MD007', :indent => 4 diff --git a/tox.ini b/tox.ini index 672927b..88df732 100644 --- a/tox.ini +++ b/tox.ini @@ -17,9 +17,11 @@ deps = isort black skip_install = True +allowlist_externals = mdl commands = isort bdfr tests --check black bdfr tests --line-length 120 --check + mdl README.md docs/ -s .markdown_style.rb [isort] profile = black From 921b2d08882636584dcad4fdb88d9413bd8240a0 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Dec 2022 16:34:52 +1000 Subject: [PATCH 34/76] Fix indents --- README.md | 216 +++++++++++++++++++++++++++--------------------------- 1 file changed, 108 insertions(+), 108 deletions(-) diff --git a/README.md b/README.md index 35159c2..dd65753 100644 --- a/README.md +++ b/README.md @@ -116,160 +116,160 @@ In case when the same option is specified both in the YAML file and in as a comm The following options are common between both the `archive` and `download` commands of the BDFR. - `directory` - - This is the directory to which the BDFR will download and place all files + - This is the directory to which the BDFR will download and place all files - `--authenticate` - - This flag will make the BDFR attempt to use an authenticated Reddit session - - See [Authentication](#authentication-and-security) for more details + - This flag will make the BDFR attempt to use an authenticated Reddit session + - See [Authentication](#authentication-and-security) for more details - `--config` - - If the path to a configuration file is supplied with this option, the BDFR will use the specified config - - See [Configuration Files](#configuration) for more details + - If the path to a configuration file is supplied with this option, the BDFR will use the specified config + - See [Configuration Files](#configuration) for more details - `--opts` - - Load options from a YAML file. - - Has higher prority than the global config file but lower than command-line arguments. - - See [opts_example.yaml](./opts_example.yaml) for an example file. + - Load options from a YAML file. + - Has higher prority than the global config file but lower than command-line arguments. + - See [opts_example.yaml](./opts_example.yaml) for an example file. - `--disable-module` - - Can be specified multiple times - - Disables certain modules from being used - - See [Disabling Modules](#disabling-modules) for more information and a list of module names + - Can be specified multiple times + - Disables certain modules from being used + - See [Disabling Modules](#disabling-modules) for more information and a list of module names - `--ignore-user` - - This will add a user to ignore - - Can be specified multiple times + - This will add a user to ignore + - Can be specified multiple times - `--include-id-file` - - This will add any submission with the IDs in the files provided - - Can be specified multiple times - - Format is one ID per line + - This will add any submission with the IDs in the files provided + - Can be specified multiple times + - Format is one ID per line - `--log` - - This allows one to specify the location of the logfile - - This must be done when running multiple instances of the BDFR, see [Multiple Instances](#multiple-instances) below + - This allows one to specify the location of the logfile + - This must be done when running multiple instances of the BDFR, see [Multiple Instances](#multiple-instances) below - `--saved` - - This option will make the BDFR use the supplied user's saved posts list as a download source - - This requires an authenticated Reddit instance, using the `--authenticate` flag, as well as `--user` set to `me` + - This option will make the BDFR use the supplied user's saved posts list as a download source + - This requires an authenticated Reddit instance, using the `--authenticate` flag, as well as `--user` set to `me` - `--search` - - This will apply the input search term to specific lists when scraping submissions - - A search term can only be applied when using the `--subreddit` and `--multireddit` flags + - This will apply the input search term to specific lists when scraping submissions + - A search term can only be applied when using the `--subreddit` and `--multireddit` flags - `--submitted` - - This will use a user's submissions as a source - - A user must be specified with `--user` + - This will use a user's submissions as a source + - A user must be specified with `--user` - `--upvoted` - - This will use a user's upvoted posts as a source of posts to scrape - - This requires an authenticated Reddit instance, using the `--authenticate` flag, as well as `--user` set to `me` + - This will use a user's upvoted posts as a source of posts to scrape + - This requires an authenticated Reddit instance, using the `--authenticate` flag, as well as `--user` set to `me` - `-L, --limit` - - This is the limit on the number of submissions retrieve - - Default is max possible - - Note that this limit applies to **each source individually** e.g. if a `--limit` of 10 and three subreddits are provided, then 30 total submissions will be scraped - - If it is not supplied, then the BDFR will default to the maximum allowed by Reddit, roughly 1000 posts. **We cannot bypass this.** + - This is the limit on the number of submissions retrieve + - Default is max possible + - Note that this limit applies to **each source individually** e.g. if a `--limit` of 10 and three subreddits are provided, then 30 total submissions will be scraped + - If it is not supplied, then the BDFR will default to the maximum allowed by Reddit, roughly 1000 posts. **We cannot bypass this.** - `-S, --sort` - - This is the sort type for each applicable submission source supplied to the BDFR - - This option does not apply to upvoted or saved posts when scraping from these sources - - The following options are available: - - `controversial` - - `hot` (default) - - `new` - - `relevance` (only available when using `--search`) - - `rising` - - `top` + - This is the sort type for each applicable submission source supplied to the BDFR + - This option does not apply to upvoted or saved posts when scraping from these sources + - The following options are available: + - `controversial` + - `hot` (default) + - `new` + - `relevance` (only available when using `--search`) + - `rising` + - `top` - `-l, --link` - - This is a direct link to a submission to download, either as a URL or an ID - - Can be specified multiple times + - This is a direct link to a submission to download, either as a URL or an ID + - Can be specified multiple times - `-m, --multireddit` - - This is the name of a multireddit to add as a source - - Can be specified multiple times - - This can be done by using `-m` multiple times - - Multireddits can also be used to provide CSV multireddits e.g. `-m 'chess, favourites'` - - The specified multireddits must all belong to the user specified with the `--user` option + - This is the name of a multireddit to add as a source + - Can be specified multiple times + - This can be done by using `-m` multiple times + - Multireddits can also be used to provide CSV multireddits e.g. `-m 'chess, favourites'` + - The specified multireddits must all belong to the user specified with the `--user` option - `-s, --subreddit` - - This adds a subreddit as a source - - Can be used mutliple times - - This can be done by using `-s` multiple times - - Subreddits can also be used to provide CSV subreddits e.g. `-m 'all, python, mindustry'` + - This adds a subreddit as a source + - Can be used mutliple times + - This can be done by using `-s` multiple times + - Subreddits can also be used to provide CSV subreddits e.g. `-m 'all, python, mindustry'` - `-t, --time` - - This is the time filter that will be applied to all applicable sources - - This option does not apply to upvoted or saved posts when scraping from these sources - - The following options are available: - - `all` (default) - - `hour` - - `day` - - `week` - - `month` - - `year` - - `--time-format` - - This specifies the format of the datetime string that replaces `{DATE}` in file and folder naming schemes - - See [Time Formatting Customisation](#time-formatting-customisation) for more details, and the formatting scheme + - This is the time filter that will be applied to all applicable sources + - This option does not apply to upvoted or saved posts when scraping from these sources + - The following options are available: + - `all` (default) + - `hour` + - `day` + - `week` + - `month` + - `year` + - `--time-format` + - This specifies the format of the datetime string that replaces `{DATE}` in file and folder naming schemes + - See [Time Formatting Customisation](#time-formatting-customisation) for more details, and the formatting scheme - `-u, --user` - - This specifies the user to scrape in concert with other options - - When using `--authenticate`, `--user me` can be used to refer to the authenticated user - - Can be specified multiple times for multiple users - - If downloading a multireddit, only one user can be specified + - This specifies the user to scrape in concert with other options + - When using `--authenticate`, `--user me` can be used to refer to the authenticated user + - Can be specified multiple times for multiple users + - If downloading a multireddit, only one user can be specified - `-v, --verbose` - - Increases the verbosity of the program - - Can be specified multiple times + - Increases the verbosity of the program + - Can be specified multiple times ### Downloader Options The following options apply only to the `download` command. This command downloads the files and resources linked to in the submission, or a text submission itself, to the disk in the specified directory. - `--make-hard-links` - - This flag will create hard links to an existing file when a duplicate is downloaded - - This will make the file appear in multiple directories while only taking the space of a single instance + - This flag will create hard links to an existing file when a duplicate is downloaded + - This will make the file appear in multiple directories while only taking the space of a single instance - `--max-wait-time` - - This option specifies the maximum wait time for downloading a resource - - The default is 120 seconds - - See [Rate Limiting](#rate-limiting) for details + - This option specifies the maximum wait time for downloading a resource + - The default is 120 seconds + - See [Rate Limiting](#rate-limiting) for details - `--no-dupes` - - This flag will not redownload files if they already exist somewhere in the root folder tree - - This is calculated by MD5 hash + - This flag will not redownload files if they already exist somewhere in the root folder tree + - This is calculated by MD5 hash - `--search-existing` - - This will make the BDFR compile the hashes for every file in `directory` and store them to remove duplicates if `--no-dupes` is also supplied + - This will make the BDFR compile the hashes for every file in `directory` and store them to remove duplicates if `--no-dupes` is also supplied - `--file-scheme` - - Sets the scheme for files - - Default is `{REDDITOR}_{TITLE}_{POSTID}` - - See [Folder and File Name Schemes](#folder-and-file-name-schemes) for more details + - Sets the scheme for files + - Default is `{REDDITOR}_{TITLE}_{POSTID}` + - See [Folder and File Name Schemes](#folder-and-file-name-schemes) for more details - `--folder-scheme` - - Sets the scheme for folders - - Default is `{SUBREDDIT}` - - See [Folder and File Name Schemes](#folder-and-file-name-schemes) for more details + - Sets the scheme for folders + - Default is `{SUBREDDIT}` + - See [Folder and File Name Schemes](#folder-and-file-name-schemes) for more details - `--exclude-id` - - This will skip the download of any submission with the ID provided - - Can be specified multiple times + - This will skip the download of any submission with the ID provided + - Can be specified multiple times - `--exclude-id-file` - - This will skip the download of any submission with any of the IDs in the files provided - - Can be specified multiple times - - Format is one ID per line + - This will skip the download of any submission with any of the IDs in the files provided + - Can be specified multiple times + - Format is one ID per line - `--skip-domain` - - This adds domains to the download filter i.e. submissions coming from these domains will not be downloaded - - Can be specified multiple times - - Domains must be supplied in the form `example.com` or `img.example.com` + - This adds domains to the download filter i.e. submissions coming from these domains will not be downloaded + - Can be specified multiple times + - Domains must be supplied in the form `example.com` or `img.example.com` - `--skip` - - This adds file types to the download filter i.e. submissions with one of the supplied file extensions will not be downloaded - - Can be specified multiple times + - This adds file types to the download filter i.e. submissions with one of the supplied file extensions will not be downloaded + - Can be specified multiple times - `--skip-subreddit` - - This skips all submissions from the specified subreddit - - Can be specified multiple times - - Also accepts CSV subreddit names + - This skips all submissions from the specified subreddit + - Can be specified multiple times + - Also accepts CSV subreddit names - `--min-score` - - This skips all submissions which have fewer than specified upvotes + - This skips all submissions which have fewer than specified upvotes - `--max-score` - - This skips all submissions which have more than specified upvotes + - This skips all submissions which have more than specified upvotes - `--min-score-ratio` - - This skips all submissions which have lower than specified upvote ratio + - This skips all submissions which have lower than specified upvote ratio - `--max-score-ratio` - - This skips all submissions which have higher than specified upvote ratio + - This skips all submissions which have higher than specified upvote ratio ### Archiver Options The following options are for the `archive` command specifically. - `--all-comments` - - When combined with the `--user` option, this will download all the user's comments + - When combined with the `--user` option, this will download all the user's comments - `-f, --format` - - This specifies the format of the data file saved to disk - - The following formats are available: - - `json` (default) - - `xml` - - `yaml` + - This specifies the format of the data file saved to disk + - The following formats are available: + - `json` (default) + - `xml` + - `yaml` - `--comment-context` - - This option will, instead of downloading an individual comment, download the submission that comment is a part of - - May result in a longer run time as it retrieves much more data + - This option will, instead of downloading an individual comment, download the submission that comment is a part of + - May result in a longer run time as it retrieves much more data ### Cloner Options @@ -426,7 +426,7 @@ The logfiles that the BDFR outputs are consistent and quite detailed and in a fo - Redgifs - Vidble - YouTube - - Any source supported by [YT-DLP](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md) should be compatable + - Any source supported by [YT-DLP](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md) should be compatable ## Contributing From bfd2d31b7b0e94094d389207f7611a58c20ab205 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Dec 2022 16:46:32 +1000 Subject: [PATCH 35/76] Update markdown style --- .markdown_style.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/.markdown_style.rb b/.markdown_style.rb index 61f127b..32ee0b1 100644 --- a/.markdown_style.rb +++ b/.markdown_style.rb @@ -1,3 +1,4 @@ all exclude_tag :line_length rule 'MD007', :indent => 4 +rule 'MD029', :style => 'ordered' From 8feb6517f1be0d303792f1639845f11267f3d22e Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Dec 2022 16:46:45 +1000 Subject: [PATCH 36/76] Format supporting documents correctly --- docs/ARCHITECTURE.md | 36 ++++++++++++++---------------------- docs/CONTRIBUTING.md | 24 ++++++++++++------------ 2 files changed, 26 insertions(+), 34 deletions(-) diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 33d4297..8fc4e13 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -6,11 +6,11 @@ When the project was rewritten for v2, the goal was to make the codebase easily The BDFR is designed to be a stateless downloader. This means that the state of the program is forgotten between each run of the program. There are no central lists, databases, or indices, that the BDFR uses, only the actual files on disk. There are several advantages to this approach: - 1. There is no chance of the database being corrupted or changed by something other than the BDFR, rendering the BDFR's "idea" of the archive wrong or incomplete. - 2. Any information about the archive is contained by the archive itself i.e. for a list of all submission IDs in the archive, this can be extracted from the names of the files in said archive, assuming an appropriate naming scheme was used. - 3. Archives can be merged, split, or editing without worrying about having to update a central database - 4. There are no versioning issues between updates of the BDFR, where old version are stuck with a worse form of the database - 5. An archive can be put on a USB, moved to another computer with possibly a very different BDFR version, and work completely fine +1. There is no chance of the database being corrupted or changed by something other than the BDFR, rendering the BDFR's "idea" of the archive wrong or incomplete. +2. Any information about the archive is contained by the archive itself i.e. for a list of all submission IDs in the archive, this can be extracted from the names of the files in said archive, assuming an appropriate naming scheme was used. +3. Archives can be merged, split, or editing without worrying about having to update a central database +4. There are no versioning issues between updates of the BDFR, where old version are stuck with a worse form of the database +5. An archive can be put on a USB, moved to another computer with possibly a very different BDFR version, and work completely fine Another major part of the ethos of the design is DOTADIW, Do One Thing And Do It Well. It's a major part of Unix philosophy and states that each tool should have a well-defined, limited purpose. To this end, the BDFR is, as the name implies, a *downloader*. That is the scope of the tool. Managing the files downloaded can be for better-suited programs, since the BDFR is not a file manager. Nor the BDFR concern itself with how any of the data downloaded is displayed, changed, parsed, or analysed. This makes the BDFR suitable for data science-related tasks, archiving, personal downloads, or analysis of various Reddit sources as the BDFR is completely agnostic on how the data is used. @@ -18,23 +18,15 @@ Another major part of the ethos of the design is DOTADIW, Do One Thing And Do It The BDFR is organised around a central object, the RedditDownloader class. The Archiver object extends and inherits from this class. - 1. The RedditDownloader parses all the arguments and configuration options, held in the Configuration object, and creates a variety of internal objects for use, such as the file name formatter, download filter, etc. - - 2. The RedditDownloader scrapes raw submissions from Reddit via several methods relating to different sources. A source is defined as a single stream of submissions from a subreddit, multireddit, or user list. - - 3. These raw submissions are passed to the DownloaderFactory class to select the specialised downloader class to use. Each of these are for a specific website or link type, with some catch-all classes like Direct. - - 4. The BaseDownloader child, spawned by DownloaderFactory, takes the link and does any necessary processing to find the direct link to the actual resource. - - 5. This is returned to the RedditDownloader in the form of a Resource object. This holds the URL and some other information for the final resource. - - 6. The Resource is passed through the DownloadFilter instantiated in step 1. - - 7. The destination file name for the Resource is calculated. If it already exists, then the Resource will be discarded. - - 8. Here the actual data is downloaded to the Resource and a hash calculated which is used to find duplicates. - - 9. Only then is the Resource written to the disk. +1. The RedditDownloader parses all the arguments and configuration options, held in the Configuration object, and creates a variety of internal objects for use, such as the file name formatter, download filter, etc. +2. The RedditDownloader scrapes raw submissions from Reddit via several methods relating to different sources. A source is defined as a single stream of submissions from a subreddit, multireddit, or user list. +3. These raw submissions are passed to the DownloaderFactory class to select the specialised downloader class to use. Each of these are for a specific website or link type, with some catch-all classes like Direct. +4. The BaseDownloader child, spawned by DownloaderFactory, takes the link and does any necessary processing to find the direct link to the actual resource. +5. This is returned to the RedditDownloader in the form of a Resource object. This holds the URL and some other information for the final resource. +6. The Resource is passed through the DownloadFilter instantiated in step 1. +7. The destination file name for the Resource is calculated. If it already exists, then the Resource will be discarded. +8. Here the actual data is downloaded to the Resource and a hash calculated which is used to find duplicates. +9. Only then is the Resource written to the disk. This is the step-by-step process that the BDFR goes through to download a Reddit post. diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 43d26f7..96e5e19 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -26,13 +26,13 @@ Before creating a pull request (PR), check out [ARCHITECTURE](ARCHITECTURE.md) f Once you have done both of these, the below list shows the path that should be followed when writing a PR. - 1. If an issue does not already exist, open one that will relate to the PR. - 2. Ensure that any changes fit into the architecture specified above. - 3. Ensure that you have written tests that cover the new code. - 4. Ensure that no existing tests fail, unless there is a good reason for them to do so. - 5. If needed, update any documentation with changes. - 6. Open a pull request that references the relevant issue. - 7. Expect changes or suggestions and heed the Code of Conduct. We're all volunteers here. +1. If an issue does not already exist, open one that will relate to the PR. +2. Ensure that any changes fit into the architecture specified above. +3. Ensure that you have written tests that cover the new code. +4. Ensure that no existing tests fail, unless there is a good reason for them to do so. +5. If needed, update any documentation with changes. +6. Open a pull request that references the relevant issue. +7. Expect changes or suggestions and heed the Code of Conduct. We're all volunteers here. Someone will review your pull request as soon as possible, but remember that all maintainers are volunteers and this won't happen immediately. Once it is approved, congratulations! Your code is now part of the BDFR. @@ -87,14 +87,14 @@ When submitting a PR, it is required that you run **all** possible tests to ensu This is accomplished with marks, a system that pytest uses to categorise tests. There are currently the current marks in use in the BDFR test suite. - `slow` - - This marks a test that may take a long time to complete - - Usually marks a test that downloads many submissions or downloads a particularly large resource + - This marks a test that may take a long time to complete + - Usually marks a test that downloads many submissions or downloads a particularly large resource - `online` - - This marks a test that requires an internet connection and uses online resources + - This marks a test that requires an internet connection and uses online resources - `reddit` - - This marks a test that accesses online Reddit specifically + - This marks a test that accesses online Reddit specifically - `authenticated` - - This marks a test that requires a test configuration file with a valid OAuth2 token + - This marks a test that requires a test configuration file with a valid OAuth2 token These tests can be run either all at once, or excluding certain marks. The tests that require online resources, such as those marked `reddit` or `online`, will naturally require more time to run than tests that are entirely offline. To run tests, you must be in the root directory of the project and can use the following command. From 47e49a2e98985f6aeef6a7971557dca9bcaa66a5 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Dec 2022 16:48:11 +1000 Subject: [PATCH 37/76] Reorder workflow dependencies --- .github/workflows/formatting_check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/formatting_check.yml b/.github/workflows/formatting_check.yml index 4941d83..39848c8 100644 --- a/.github/workflows/formatting_check.yml +++ b/.github/workflows/formatting_check.yml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: paolorechia/pox@v1.0.1 - uses: actionshub/markdownlint@main + - uses: paolorechia/pox@v1.0.1 with: tox_env: "format_check" \ No newline at end of file From 8cfc3140380dc9f29c215228f4567b9e6b451d68 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Dec 2022 16:51:18 +1000 Subject: [PATCH 38/76] Install mdl in workflow --- .github/workflows/formatting_check.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/formatting_check.yml b/.github/workflows/formatting_check.yml index 39848c8..79ccb4b 100644 --- a/.github/workflows/formatting_check.yml +++ b/.github/workflows/formatting_check.yml @@ -5,8 +5,9 @@ jobs: formatting_check: runs-on: ubuntu-latest steps: + - name: Install dependencies + run: gem install mdl - uses: actions/checkout@v3 - - uses: actionshub/markdownlint@main - uses: paolorechia/pox@v1.0.1 with: tox_env: "format_check" \ No newline at end of file From 614c19be109effff93e47ae26f0ba9bf5503a9e6 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Dec 2022 16:56:44 +1000 Subject: [PATCH 39/76] Fix workflow error --- .github/workflows/formatting_check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/formatting_check.yml b/.github/workflows/formatting_check.yml index 79ccb4b..deb44d5 100644 --- a/.github/workflows/formatting_check.yml +++ b/.github/workflows/formatting_check.yml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Install dependencies - run: gem install mdl + run: sudo gem install mdl - uses: actions/checkout@v3 - uses: paolorechia/pox@v1.0.1 with: From d4bfe8fa194b7dac3b8c71a6fb7af1644d45bc95 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Sat, 3 Dec 2022 14:49:39 -0500 Subject: [PATCH 40/76] Formatting cleanup Cleanup some formatting from switch to Black --- bdfr/archiver.py | 2 +- tests/test_downloader.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/bdfr/archiver.py b/bdfr/archiver.py index 3d0d31b..28a270b 100644 --- a/bdfr/archiver.py +++ b/bdfr/archiver.py @@ -35,7 +35,7 @@ class Archiver(RedditConnector): ): logger.debug( f"Submission {submission.id} in {submission.subreddit.display_name} skipped" - f' due to {submission.author.name if submission.author else "DELETED"} being an ignored user' + f" due to {submission.author.name if submission.author else 'DELETED'} being an ignored user" ) continue if submission.id in self.excluded_submission_ids: diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 7b81a85..ba81b80 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -167,9 +167,7 @@ def test_download_submission_file_exists( folder_contents = list(tmp_path.iterdir()) output = capsys.readouterr() assert len(folder_contents) == 1 - assert ( - "Arneeman_Metagaming isn't always a bad thing_m1hqw6.png" " from submission m1hqw6 already exists" in output.out - ) + assert "Arneeman_Metagaming isn't always a bad thing_m1hqw6.png from submission m1hqw6 already exists" in output.out @pytest.mark.online From 8af00b20bcb9ce90479c6f93569ae53b892a53de Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 4 Dec 2022 14:49:37 +1000 Subject: [PATCH 41/76] Move formatter settings --- pyproject.toml | 7 +++++++ tox.ini | 9 ++------- 2 files changed, 9 insertions(+), 7 deletions(-) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4dced2f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,7 @@ +[tool.black] +line-length = 120 + +[tool.isort] +profile = "black" +multi_line_output = 3 +line_length = 120 diff --git a/tox.ini b/tox.ini index 88df732..b50927c 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,7 @@ deps = skip_install = True commands = isort bdfr tests - black bdfr tests --line-length 120 + black bdfr tests [testenv:format_check] deps = @@ -20,10 +20,5 @@ skip_install = True allowlist_externals = mdl commands = isort bdfr tests --check - black bdfr tests --line-length 120 --check + black bdfr tests --check mdl README.md docs/ -s .markdown_style.rb - -[isort] -profile = black -multi_line_output = 3 -line_length = 120 From 7e3b11caf851cb524b80504395816f6f5f7b1637 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 4 Dec 2022 14:58:54 +1000 Subject: [PATCH 42/76] Add support for pre-commit --- .pre-commit-config.yaml | 19 +++++++++++++++++++ dev-requirements.txt | 1 + 2 files changed, 20 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..7013228 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,19 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks + +repos: + - repo: https://github.com/psf/black + rev: 22.10.0 + hooks: + - id: black + + - repo: https://github.com/pycqa/isort + rev: 5.10.1 + hooks: + - id: isort + name: isort (python) + + - repo: https://github.com/markdownlint/markdownlint + rev: v0.12.0 + hooks: + - id: markdownlint diff --git a/dev-requirements.txt b/dev-requirements.txt index af48d1b..34597e7 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,4 +1,5 @@ black isort +pre-commit pytest tox From 628739d0b8320f5a76c5badb302fb90dc23732d0 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Thu, 8 Dec 2022 15:46:53 +1000 Subject: [PATCH 43/76] Add markdownlint default file --- .mdlrc | 1 + tox.ini | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 .mdlrc diff --git a/.mdlrc b/.mdlrc new file mode 100644 index 0000000..dd06c8c --- /dev/null +++ b/.mdlrc @@ -0,0 +1 @@ +style "#{File.dirname(__FILE__)}/.markdown_style.rb" diff --git a/tox.ini b/tox.ini index b50927c..be451e6 100644 --- a/tox.ini +++ b/tox.ini @@ -21,4 +21,4 @@ allowlist_externals = mdl commands = isort bdfr tests --check black bdfr tests --check - mdl README.md docs/ -s .markdown_style.rb + mdl README.md docs/ From 1bc20f238e1c0eba396d1ed3d2e329f2454c0ced Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Thu, 8 Dec 2022 15:46:58 +1000 Subject: [PATCH 44/76] Update CONTRIBUTING to include new tools --- docs/CONTRIBUTING.md | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 96e5e19..ea2d37f 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -58,23 +58,36 @@ Then, you can run the program from anywhere in your disk as such: bdfr ``` -## Style Guide +There are additional Python packages that are required to develop the BDFR. These can be installed with the following command: -The BDFR must conform to PEP8 standard wherever there is Python code, with one exception. Line lengths may extend to 120 characters, but all other PEP8 standards must be followed. - -It's easy to format your code without any manual work via a variety of tools. Autopep8 is a good one, and can be used with `autopep8 --max-line-length 120` which will format the code according to the style in use with the BDFR. - -Hanging brackets are preferred when there are many items, items that otherwise go over the 120 character line limit, or when doing so would increase readability. It is also preferred when there might be many commits altering the list, such as with the parameter lists for tests. A hanging comma is also required in such cases. An example of this is below: - -```python -test = [ - 'test 1', - 'test 2', - 'test 3', -] +```bash +python3 -m pip install -r dev-requirements.txt ``` -Note that the last bracket is on its own line, and that the first bracket has a new line before the first term. Also note that there is a comma after the last term. +### Tools + +The BDFR project uses several tools to manage the code of the project. These include: + +- [black](https://github.com/psf/black) +- [isort](https://github.com/PyCQA/isort) +- [markdownlint (mdl)](https://github.com/markdownlint/markdownlint) +- [tox](https://tox.wiki/en/latest/) +- [pre-commit](https://github.com/pre-commit/pre-commit) + +The first three tools are formatters. These change the code to the standards expected for the BDFR project. The configuration details for these tools are contained in the [pyproject.toml](../pyproject.toml) file for the project. + +The tool `tox` is used to run tests and tools on demand and has the following environments: + +- `format` +- `format_check` + +The tool `pre-commit` is optional, and runs the three formatting tools automatically when a commit is made. This is **highly recommended** to ensure that all code submitted for this project is formatted acceptably. Note that any PR that does not follow the formatting guide will not be accepted. For information on how to use pre-commit to avoid this, see [the pre-commit documentation](https://pre-commit.com/). + +## Style Guide + +The BDFR uses the Black formatting standard and enforces this with the tool by the same name. Additionally, the tool isort is used as well to format imports. + +See [Preparing the Environment for Development](#preparing-the-environment-for-development) for how to setup these tools to run automatically. ## Tests From 3aa740e979ffd976508009a8e4c80934eeb3eaeb Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Sat, 10 Dec 2022 12:36:54 -0500 Subject: [PATCH 45/76] Add soft fail on 5xx Prawcore errors. --- bdfr/connector.py | 44 +++++++++++++++++++++++++------------------- bdfr/downloader.py | 16 +++++++++++----- 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/bdfr/connector.py b/bdfr/connector.py index ea970db..e5d74a2 100644 --- a/bdfr/connector.py +++ b/bdfr/connector.py @@ -13,6 +13,7 @@ from abc import ABCMeta, abstractmethod from datetime import datetime from enum import Enum, auto from pathlib import Path +from time import sleep from typing import Callable, Iterator import appdirs @@ -353,26 +354,31 @@ class RedditConnector(metaclass=ABCMeta): generators = [] for user in self.args.user: try: - self.check_user_existence(user) - except errors.BulkDownloaderException as e: - logger.error(e) - continue - if self.args.submitted: - logger.debug(f"Retrieving submitted posts of user {self.args.user}") - generators.append( - self.create_filtered_listing_generator( - self.reddit_instance.redditor(user).submissions, + try: + self.check_user_existence(user) + except errors.BulkDownloaderException as e: + logger.error(e) + continue + if self.args.submitted: + logger.debug(f"Retrieving submitted posts of user {user}") + generators.append( + self.create_filtered_listing_generator( + self.reddit_instance.redditor(user).submissions, + ) ) - ) - if not self.authenticated and any((self.args.upvoted, self.args.saved)): - logger.warning("Accessing user lists requires authentication") - else: - if self.args.upvoted: - logger.debug(f"Retrieving upvoted posts of user {self.args.user}") - generators.append(self.reddit_instance.redditor(user).upvoted(limit=self.args.limit)) - if self.args.saved: - logger.debug(f"Retrieving saved posts of user {self.args.user}") - generators.append(self.reddit_instance.redditor(user).saved(limit=self.args.limit)) + if not self.authenticated and any((self.args.upvoted, self.args.saved)): + logger.warning("Accessing user lists requires authentication") + else: + if self.args.upvoted: + logger.debug(f"Retrieving upvoted posts of user {user}") + generators.append(self.reddit_instance.redditor(user).upvoted(limit=self.args.limit)) + if self.args.saved: + logger.debug(f"Retrieving saved posts of user {user}") + generators.append(self.reddit_instance.redditor(user).saved(limit=self.args.limit)) + except prawcore.PrawcoreException as e: + logger.error(f"User {user} failed to be retrieved due to a PRAW exception: {e}") + logger.debug("Waiting 60 seconds to continue") + sleep(60) return generators else: return [] diff --git a/bdfr/downloader.py b/bdfr/downloader.py index fa5d10c..1cb6d46 100644 --- a/bdfr/downloader.py +++ b/bdfr/downloader.py @@ -8,6 +8,7 @@ import time from datetime import datetime from multiprocessing import Pool from pathlib import Path +from time import sleep import praw import praw.exceptions @@ -42,11 +43,16 @@ class RedditDownloader(RedditConnector): def download(self): for generator in self.reddit_lists: - for submission in generator: - try: - self._download_submission(submission) - except prawcore.PrawcoreException as e: - logger.error(f"Submission {submission.id} failed to download due to a PRAW exception: {e}") + try: + for submission in generator: + try: + self._download_submission(submission) + except prawcore.PrawcoreException as e: + logger.error(f"Submission {submission.id} failed to download due to a PRAW exception: {e}") + except prawcore.PrawcoreException as e: + logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}") + logger.debug("Waiting 60 seconds to continue") + sleep(60) def _download_submission(self, submission: praw.models.Submission): if submission.id in self.excluded_submission_ids: From ac91c9089c652e77a23ea44a556632e4d8e17636 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Sat, 10 Dec 2022 21:19:29 -0500 Subject: [PATCH 46/76] Add 5xx soft fail for clone/archive --- bdfr/archiver.py | 40 +++++++++++++++++++++++----------------- bdfr/cloner.py | 18 ++++++++++++------ 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/bdfr/archiver.py b/bdfr/archiver.py index 28a270b..e2ed33d 100644 --- a/bdfr/archiver.py +++ b/bdfr/archiver.py @@ -4,6 +4,7 @@ import json import logging import re +from time import sleep from typing import Iterator, Union import dict2xml @@ -28,23 +29,28 @@ class Archiver(RedditConnector): def download(self): for generator in self.reddit_lists: - for submission in generator: - try: - if (submission.author and submission.author.name in self.args.ignore_user) or ( - submission.author is None and "DELETED" in self.args.ignore_user - ): - logger.debug( - f"Submission {submission.id} in {submission.subreddit.display_name} skipped" - f" due to {submission.author.name if submission.author else 'DELETED'} being an ignored user" - ) - continue - if submission.id in self.excluded_submission_ids: - logger.debug(f"Object {submission.id} in exclusion list, skipping") - continue - logger.debug(f"Attempting to archive submission {submission.id}") - self.write_entry(submission) - except prawcore.PrawcoreException as e: - logger.error(f"Submission {submission.id} failed to be archived due to a PRAW exception: {e}") + try: + for submission in generator: + try: + if (submission.author and submission.author.name in self.args.ignore_user) or ( + submission.author is None and "DELETED" in self.args.ignore_user + ): + logger.debug( + f"Submission {submission.id} in {submission.subreddit.display_name} skipped due to" + f" {submission.author.name if submission.author else 'DELETED'} being an ignored user" + ) + continue + if submission.id in self.excluded_submission_ids: + logger.debug(f"Object {submission.id} in exclusion list, skipping") + continue + logger.debug(f"Attempting to archive submission {submission.id}") + self.write_entry(submission) + except prawcore.PrawcoreException as e: + logger.error(f"Submission {submission.id} failed to be archived due to a PRAW exception: {e}") + except prawcore.PrawcoreException as e: + logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}") + logger.debug("Waiting 60 seconds to continue") + sleep(60) def get_submissions_from_link(self) -> list[list[praw.models.Submission]]: supplied_submissions = [] diff --git a/bdfr/cloner.py b/bdfr/cloner.py index c26d17b..e82cfaa 100644 --- a/bdfr/cloner.py +++ b/bdfr/cloner.py @@ -2,6 +2,7 @@ # coding=utf-8 import logging +from time import sleep import prawcore @@ -18,9 +19,14 @@ class RedditCloner(RedditDownloader, Archiver): def download(self): for generator in self.reddit_lists: - for submission in generator: - try: - self._download_submission(submission) - self.write_entry(submission) - except prawcore.PrawcoreException as e: - logger.error(f"Submission {submission.id} failed to be cloned due to a PRAW exception: {e}") + try: + for submission in generator: + try: + self._download_submission(submission) + self.write_entry(submission) + except prawcore.PrawcoreException as e: + logger.error(f"Submission {submission.id} failed to be cloned due to a PRAW exception: {e}") + except prawcore.PrawcoreException as e: + logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}") + logger.debug("Waiting 60 seconds to continue") + sleep(60) From 15a9d25a9db82ab543b9775349a90d2a69c3f5a7 Mon Sep 17 00:00:00 2001 From: Soulsuck24 <79275800+Soulsuck24@users.noreply.github.com> Date: Sun, 11 Dec 2022 14:20:04 -0500 Subject: [PATCH 47/76] Imgur webp coverage update regex to catch _d in webp links from imgur. --- bdfr/site_downloaders/imgur.py | 2 +- tests/site_downloaders/test_imgur.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/bdfr/site_downloaders/imgur.py b/bdfr/site_downloaders/imgur.py index f91e34f..0b9ecdd 100644 --- a/bdfr/site_downloaders/imgur.py +++ b/bdfr/site_downloaders/imgur.py @@ -41,7 +41,7 @@ class Imgur(BaseDownloader): @staticmethod def _get_data(link: str) -> dict: try: - imgur_id = re.match(r".*/(.*?)(\..{0,})?$", link).group(1) + imgur_id = re.match(r".*/(.*?)(_d)?(\..{0,})?$", link).group(1) gallery = "a/" if re.search(r".*/(.*?)(gallery/|a/)", link) else "" link = f"https://imgur.com/{gallery}{imgur_id}" except AttributeError: diff --git a/tests/site_downloaders/test_imgur.py b/tests/site_downloaders/test_imgur.py index 38dbdc5..6b49cd5 100644 --- a/tests/site_downloaders/test_imgur.py +++ b/tests/site_downloaders/test_imgur.py @@ -170,6 +170,10 @@ def test_imgur_extension_validation_bad(test_extension: str): "http://i.imgur.com/s9uXxlq.jpg?5.jpg", ("338de3c23ee21af056b3a7c154e2478f",), ), + ( + "https://i.imgur.com/2TtN68l_d.webp", + ("6569ab9ad9fa68d93f6b408f112dd741",), + ), ), ) def test_find_resources(test_url: str, expected_hashes: list[str]): From 4ba5df6b3728c08980ab1fe3f99bbf051a8168c3 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Wed, 14 Dec 2022 23:04:33 -0500 Subject: [PATCH 48/76] 5xx error tests --- .../test_archive_integration.py | 29 +++++++++++++++++++ .../test_clone_integration.py | 29 +++++++++++++++++++ .../test_download_integration.py | 29 +++++++++++++++++++ 3 files changed, 87 insertions(+) diff --git a/tests/integration_tests/test_archive_integration.py b/tests/integration_tests/test_archive_integration.py index f10f37c..1c0d30a 100644 --- a/tests/integration_tests/test_archive_integration.py +++ b/tests/integration_tests/test_archive_integration.py @@ -4,7 +4,9 @@ import re import shutil from pathlib import Path +from unittest.mock import MagicMock, patch +import prawcore import pytest from click.testing import CliRunner @@ -176,3 +178,30 @@ def test_cli_archive_soft_fail(test_args: list[str], tmp_path: Path): assert result.exit_code == 0 assert "failed to be archived due to a PRAW exception" in result.output assert "Attempting to archive" not in result.output + + +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + ("test_args", "response"), + ( + ( + ["--user", "nasa", "--submitted"], + 502, + ), + ( + ["--user", "nasa", "--submitted"], + 504, + ), + ), +) +def test_user_serv_fail(test_args: list[str], response: int, tmp_path: Path): + runner = CliRunner() + test_args = create_basic_args_for_archive_runner(test_args, tmp_path) + with patch("bdfr.connector.sleep", return_value=None): + with patch( + "bdfr.connector.RedditConnector.check_user_existence", + side_effect=prawcore.exceptions.ResponseException(MagicMock(status_code=response)), + ): + result = runner.invoke(cli, test_args) + assert result.exit_code == 0 + assert f"received {response} HTTP response" in result.output diff --git a/tests/integration_tests/test_clone_integration.py b/tests/integration_tests/test_clone_integration.py index e8dc008..eb64364 100644 --- a/tests/integration_tests/test_clone_integration.py +++ b/tests/integration_tests/test_clone_integration.py @@ -3,7 +3,9 @@ import shutil from pathlib import Path +from unittest.mock import MagicMock, patch +import prawcore import pytest from click.testing import CliRunner @@ -68,3 +70,30 @@ def test_cli_scrape_soft_fail(test_args: list[str], tmp_path: Path): assert result.exit_code == 0 assert "Downloaded submission" not in result.output assert "Record for entry item" not in result.output + + +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + ("test_args", "response"), + ( + ( + ["--user", "nasa", "--submitted"], + 502, + ), + ( + ["--user", "nasa", "--submitted"], + 504, + ), + ), +) +def test_user_serv_fail(test_args: list[str], response: int, tmp_path: Path): + runner = CliRunner() + test_args = create_basic_args_for_cloner_runner(test_args, tmp_path) + with patch("bdfr.connector.sleep", return_value=None): + with patch( + "bdfr.connector.RedditConnector.check_user_existence", + side_effect=prawcore.exceptions.ResponseException(MagicMock(status_code=response)), + ): + result = runner.invoke(cli, test_args) + assert result.exit_code == 0 + assert f"received {response} HTTP response" in result.output diff --git a/tests/integration_tests/test_download_integration.py b/tests/integration_tests/test_download_integration.py index 2ab38a0..e44c95e 100644 --- a/tests/integration_tests/test_download_integration.py +++ b/tests/integration_tests/test_download_integration.py @@ -3,7 +3,9 @@ import shutil from pathlib import Path +from unittest.mock import MagicMock, patch +import prawcore import pytest from click.testing import CliRunner @@ -396,3 +398,30 @@ def test_cli_download_score_filter(test_args: list[str], was_filtered: bool, tmp result = runner.invoke(cli, test_args) assert result.exit_code == 0 assert ("filtered due to score" in result.output) == was_filtered + + +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + ("test_args", "response"), + ( + ( + ["--user", "nasa", "--submitted"], + 502, + ), + ( + ["--user", "nasa", "--submitted"], + 504, + ), + ), +) +def test_user_serv_fail(test_args: list[str], response: int, tmp_path: Path): + runner = CliRunner() + test_args = create_basic_args_for_download_runner(test_args, tmp_path) + with patch("bdfr.connector.sleep", return_value=None): + with patch( + "bdfr.connector.RedditConnector.check_user_existence", + side_effect=prawcore.exceptions.ResponseException(MagicMock(status_code=response)), + ): + result = runner.invoke(cli, test_args) + assert result.exit_code == 0 + assert f"received {response} HTTP response" in result.output From e32d322dbd2d5532201bdf5123e0dbe71ec0179f Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Fri, 16 Dec 2022 14:56:44 -0500 Subject: [PATCH 49/76] Add shell completions --- README.md | 10 +++++--- bdfr/__main__.py | 33 ++++++++++++++++++++++++++ bdfr/completion.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 3 deletions(-) create mode 100644 bdfr/completion.py diff --git a/README.md b/README.md index dd65753..4b634ec 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,9 @@ Included in this README are a few example Bash tricks to get certain behaviour. ## Installation -*Bulk Downloader for Reddit* needs Python version 3.9 or above. Please update Python before installation to meet the requirement. Then, you can install it via pip with: +*Bulk Downloader for Reddit* needs Python version 3.9 or above. Please update Python before installation to meet the requirement. + +Then, you can install it via pip with: ```bash python3 -m pip install bdfr --upgrade @@ -21,10 +23,12 @@ python3 -m pip install bdfr --upgrade or via [pipx](https://pypa.github.io/pipx) with: ```bash -python3 -m pipx install bdfr --upgrade +python3 -m pipx install bdfr ``` -**To update BDFR**, run the above command again after the installation. +**To update BDFR**, run the above command again for pip or `pipx upgrade bdfr` for pipx installations. + +**To install shell completions**, run `bdfr completions` ### AUR Package diff --git a/bdfr/__main__.py b/bdfr/__main__.py index c26f577..d0c6664 100644 --- a/bdfr/__main__.py +++ b/bdfr/__main__.py @@ -7,6 +7,7 @@ import click from bdfr.archiver import Archiver from bdfr.cloner import RedditCloner +from bdfr.completion import Completion from bdfr.configuration import Configuration from bdfr.downloader import RedditDownloader @@ -74,15 +75,19 @@ def _add_options(opts: list): @click.group() +@click.help_option("-h", "--help") def cli(): + """BDFR is used to download and archive content from Reddit.""" pass @cli.command("download") @_add_options(_common_options) @_add_options(_downloader_options) +@click.help_option("-h", "--help") @click.pass_context def cli_download(context: click.Context, **_): + """Used to download content posted to Reddit.""" config = Configuration() config.process_click_arguments(context) setup_logging(config.verbose) @@ -99,8 +104,10 @@ def cli_download(context: click.Context, **_): @cli.command("archive") @_add_options(_common_options) @_add_options(_archiver_options) +@click.help_option("-h", "--help") @click.pass_context def cli_archive(context: click.Context, **_): + """Used to archive post data from Reddit.""" config = Configuration() config.process_click_arguments(context) setup_logging(config.verbose) @@ -118,8 +125,10 @@ def cli_archive(context: click.Context, **_): @_add_options(_common_options) @_add_options(_archiver_options) @_add_options(_downloader_options) +@click.help_option("-h", "--help") @click.pass_context def cli_clone(context: click.Context, **_): + """Combines archive and download commands.""" config = Configuration() config.process_click_arguments(context) setup_logging(config.verbose) @@ -133,6 +142,30 @@ def cli_clone(context: click.Context, **_): logger.info("Program complete") +@cli.command("completion") +@click.argument("shell", type=click.Choice(("all", "bash", "fish", "zsh"), case_sensitive=False), default="all") +@click.help_option("-h", "--help") +@click.option("-u", "--uninstall", is_flag=True, default=False, help="Uninstall completion") +def cli_completion(shell: str, uninstall: bool): + """\b + Installs shell completions for BDFR. + Options: all, bash, fish, zsh + Default: all""" + shell = shell.lower() + if sys.platform == "win32": + print("Completions are not currently supported on Windows.") + return + if uninstall and click.confirm(f"Would you like to uninstall {shell} completions for BDFR"): + Completion(shell).uninstall() + return + if shell not in ("all", "bash", "fish", "zsh"): + print(f"{shell} is not a valid option.") + print("Options: all, bash, fish, zsh") + return + if click.confirm(f"Would you like to install {shell} completions for BDFR"): + Completion(shell).install() + + def setup_logging(verbosity: int): class StreamExceptionFilter(logging.Filter): def filter(self, record: logging.LogRecord) -> bool: diff --git a/bdfr/completion.py b/bdfr/completion.py new file mode 100644 index 0000000..1902319 --- /dev/null +++ b/bdfr/completion.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# coding=utf-8 + +import os +import subprocess + +import appdirs + + +class Completion: + def __init__(self, shell: str): + self.shell = shell + self.env = os.environ.copy() + self.share_dir = appdirs.user_data_dir() + self.entry_points = ["bdfr"] + + def install(self): + if self.shell in ("all", "bash"): + comp_dir = self.share_dir + "/bash-completion/completions/" + for point in self.entry_points: + self.env[f"_{point.upper().replace('-', '_')}_COMPLETE"] = "bash_source" + with open(comp_dir + point, "w") as file: + file.write(subprocess.run([point], env=self.env, capture_output=True, text=True).stdout) + print(f"Bash completion for {point} written to {comp_dir}{point}") + if self.shell in ("all", "fish"): + comp_dir = self.share_dir + "/fish/vendor_completions.d/" + for point in self.entry_points: + self.env[f"_{point.upper().replace('-', '_')}_COMPLETE"] = "fish_source" + with open(comp_dir + point, "w") as file: + file.write(subprocess.run([point], env=self.env, capture_output=True, text=True).stdout) + print(f"Fish completion for {point} written to {comp_dir}{point}") + if self.shell in ("all", "zsh"): + comp_dir = self.share_dir + "/zsh/site-functions/" + for point in self.entry_points: + self.env[f"_{point.upper().replace('-', '_')}_COMPLETE"] = "zsh_source" + with open(comp_dir + point, "w") as file: + file.write(subprocess.run([point], env=self.env, capture_output=True, text=True).stdout) + print(f"Zsh completion for {point} written to {comp_dir}{point}") + + def uninstall(self): + if self.shell in ("all", "bash"): + comp_dir = self.share_dir + "/bash-completion/completions/" + for point in self.entry_points: + if os.path.exists(comp_dir + point): + os.remove(comp_dir + point) + print(f"Bash completion for {point} removed from {comp_dir}{point}") + if self.shell in ("all", "fish"): + comp_dir = self.share_dir + "/fish/vendor_completions.d/" + for point in self.entry_points: + if os.path.exists(comp_dir + point): + os.remove(comp_dir + point) + print(f"Fish completion for {point} removed from {comp_dir}{point}") + if self.shell in ("all", "zsh"): + comp_dir = self.share_dir + "/zsh/site-functions/" + for point in self.entry_points: + if os.path.exists(comp_dir + point): + os.remove(comp_dir + point) + print(f"Zsh completion for {point} removed from {comp_dir}{point}") From 8c01a9e7a04bdbe759ee6f739a095b9b8329a462 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Fri, 16 Dec 2022 23:45:36 -0500 Subject: [PATCH 50/76] Consolidate to pyproject Consolidates configs to pyproject.toml and updates workflows accordingly as well as sets sane minimums for dev requirements. adds version check to main script. --- .github/workflows/formatting_check.yml | 2 +- .github/workflows/publish.yml | 14 ++--- .github/workflows/test.yml | 10 ++-- README.md | 2 + bdfr/__init__.py | 1 + bdfr/__main__.py | 19 +++++++ bdfr/completion.py | 2 +- dev-requirements.txt | 5 -- docs/CONTRIBUTING.md | 2 +- pyproject.toml | 75 ++++++++++++++++++++++++++ pytest.ini | 7 --- requirements.txt | 9 ---- setup.cfg | 26 --------- setup.py | 6 --- tox.ini | 2 + 15 files changed, 114 insertions(+), 68 deletions(-) delete mode 100644 dev-requirements.txt delete mode 100644 pytest.ini delete mode 100644 requirements.txt delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/.github/workflows/formatting_check.yml b/.github/workflows/formatting_check.yml index deb44d5..5a45479 100644 --- a/.github/workflows/formatting_check.yml +++ b/.github/workflows/formatting_check.yml @@ -10,4 +10,4 @@ jobs: - uses: actions/checkout@v3 - uses: paolorechia/pox@v1.0.1 with: - tox_env: "format_check" \ No newline at end of file + tox_env: "format_check" diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 6f15a00..589c201 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -11,25 +11,25 @@ jobs: deploy: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: '3.9' - name: Install dependencies run: | python -m pip install --upgrade pip - pip install setuptools wheel twine + pip install build setuptools wheel twine - name: Build and publish env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | - python setup.py sdist bdist_wheel + python -m build twine upload dist/* - - - name: Upload coverage report - uses: actions/upload-artifact@v2 + + - name: Upload dist folder + uses: actions/upload-artifact@v3 with: name: dist path: dist/ diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5aa8c61..0d52ef6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -19,16 +19,16 @@ jobs: python-version: 3.9 ext: .ps1 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip flake8 pytest pytest-cov - pip install -r requirements.txt + pip install . - name: Make configuration for tests env: @@ -43,9 +43,9 @@ jobs: - name: Test with pytest run: | pytest -m 'not slow' --verbose --cov=./bdfr/ --cov-report term:skip-covered --cov-report html - + - name: Upload coverage report - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: coverage_report path: htmlcov/ diff --git a/README.md b/README.md index 4b634ec..f732f6e 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,8 @@ python3 -m pipx install bdfr **To update BDFR**, run the above command again for pip or `pipx upgrade bdfr` for pipx installations. +**To check your version of BDFR**, run `bdfr --version` + **To install shell completions**, run `bdfr completions` ### AUR Package diff --git a/bdfr/__init__.py b/bdfr/__init__.py index e69de29..b482efe 100644 --- a/bdfr/__init__.py +++ b/bdfr/__init__.py @@ -0,0 +1 @@ +__version__ = "2.6.2" diff --git a/bdfr/__main__.py b/bdfr/__main__.py index d0c6664..57373c9 100644 --- a/bdfr/__main__.py +++ b/bdfr/__main__.py @@ -4,7 +4,9 @@ import logging import sys import click +import requests +from bdfr import __version__ from bdfr.archiver import Archiver from bdfr.cloner import RedditCloner from bdfr.completion import Completion @@ -74,8 +76,25 @@ def _add_options(opts: list): return wrap +def _check_version(context, param, value): + if not value or context.resilient_parsing: + return + current = __version__ + latest = requests.get("https://pypi.org/pypi/bdfr/json").json()["info"]["version"] + print(f"You are currently using v{current} the latest is v{latest}") + context.exit() + + @click.group() @click.help_option("-h", "--help") +@click.option( + "--version", + is_flag=True, + is_eager=True, + expose_value=False, + callback=_check_version, + help="Check version and exit.", +) def cli(): """BDFR is used to download and archive content from Reddit.""" pass diff --git a/bdfr/completion.py b/bdfr/completion.py index 1902319..8f4f122 100644 --- a/bdfr/completion.py +++ b/bdfr/completion.py @@ -12,7 +12,7 @@ class Completion: self.shell = shell self.env = os.environ.copy() self.share_dir = appdirs.user_data_dir() - self.entry_points = ["bdfr"] + self.entry_points = ["bdfr", "bdfr-archive", "bdfr-clone", "bdfr-download"] def install(self): if self.shell in ("all", "bash"): diff --git a/dev-requirements.txt b/dev-requirements.txt deleted file mode 100644 index 34597e7..0000000 --- a/dev-requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -black -isort -pre-commit -pytest -tox diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index ea2d37f..72666e7 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -61,7 +61,7 @@ bdfr There are additional Python packages that are required to develop the BDFR. These can be installed with the following command: ```bash -python3 -m pip install -r dev-requirements.txt +python3 -m pip install -e .[dev] ``` ### Tools diff --git a/pyproject.toml b/pyproject.toml index 4dced2f..23ae690 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,82 @@ +[build-system] +requires = ["setuptools>=65.6.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "bdfr" +description = "Downloads and archives content from reddit" +readme = "README.md" +requires-python = ">=3.9" +license = {file = "LICENSE"} +keywords = ["reddit", "download", "archive",] +authors = [{name = "Ali Parlakci", email = "parlakciali@gmail.com"}] +maintainers = [{name = "Serene Arc", email = "serenical@gmail.com"}] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Natural Language :: English", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", +] +dependencies = [ + "appdirs>=1.4.4", + "beautifulsoup4>=4.10.0", + "click>=8.0.0", + "dict2xml>=1.7.0", + "praw>=7.2.0", + "pyyaml>=5.4.1", + "requests>=2.25.1", + "yt-dlp>=2022.11.11", +] +dynamic = ["version"] + +[tool.setuptools] +dynamic = {"version" = {attr = 'bdfr.__version__'}} +packages = ["bdfr", "bdfr.archive_entry", "bdfr.site_downloaders", "bdfr.site_downloaders.fallback_downloaders",] +data-files = {"config" = ["bdfr/default_config.cfg",]} + +[project.optional-dependencies] +dev = [ + "black>=22.10.0", + "isort>=5.10.1", + "pre-commit>=2.20.0", + "pytest>=7.1.0", + "tox>=3.27.1", +] + +[project.urls] +"Homepage" = "https://aliparlakci.github.io/bulk-downloader-for-reddit" +"Source" = "https://github.com/aliparlakci/bulk-downloader-for-reddit" +"Bug Reports" = "https://github.com/aliparlakci/bulk-downloader-for-reddit/issues" + +[project.scripts] +bdfr = "bdfr.__main__:cli" +bdfr-archive = "bdfr.__main__:cli_archive" +bdfr-clone = "bdfr.__main__:cli_clone" +bdfr-download = "bdfr.__main__:cli_download" + [tool.black] line-length = 120 [tool.isort] profile = "black" +py_version = 39 multi_line_output = 3 line_length = 120 +indent = 4 + +[tool.pytest.ini_options] +minversion = "7.1" +addopts = "--strict-markers" +testpaths = "tests" +markers = [ + "online: tests require a connection to the internet", + "reddit: tests require a connection to Reddit", + "slow: test is slow to run", + "authenticated: test requires an authenticated Reddit instance", + "testing: incomplete tests", +] diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 09df53c..0000000 --- a/pytest.ini +++ /dev/null @@ -1,7 +0,0 @@ -[pytest] -addopts = --strict-markers -markers = - online: tests require a connection to the internet - reddit: tests require a connection to Reddit - slow: test is slow to run - authenticated: test requires an authenticated Reddit instance diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 62e6925..0000000 --- a/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -appdirs>=1.4.4 -bs4>=0.0.1 -click>=7.1.2 -dict2xml>=1.7.0 -ffmpeg-python>=0.2.0 -praw>=7.2.0 -pyyaml>=5.4.1 -requests>=2.25.1 -yt-dlp>=2022.11.11 diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index bb0ea60..0000000 --- a/setup.cfg +++ /dev/null @@ -1,26 +0,0 @@ -[metadata] -name = bdfr -description_file = README.md -description_content_type = text/markdown -home_page = https://github.com/aliparlakci/bulk-downloader-for-reddit -keywords = reddit, download, archive -version = 2.6.2 -author = Ali Parlakci -author_email = parlakciali@gmail.com -maintainer = Serene Arc -maintainer_email = serenical@gmail.com -license = GPLv3 -classifiers = - Programming Language :: Python :: 3 - License :: OSI Approved :: GNU General Public License v3 (GPLv3) - Natural Language :: English - Environment :: Console - Operating System :: OS Independent -platforms = any - -[files] -packages = bdfr - -[entry_points] -console_scripts = - bdfr = bdfr.__main__:cli diff --git a/setup.py b/setup.py deleted file mode 100644 index c5518a6..0000000 --- a/setup.py +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env python3 -# encoding=utf-8 - -from setuptools import setup - -setup(setup_requires=['pbr', 'appdirs'], pbr=True, data_files=[('config', ['bdfr/default_config.cfg'])], python_requires='>=3.9.0') diff --git a/tox.ini b/tox.ini index be451e6..01ece39 100644 --- a/tox.ini +++ b/tox.ini @@ -1,4 +1,6 @@ [tox] +requires = + tox>=3.27.1 envlist = format format_check From af6222e06c5f9b558ac391254844db6b28f4111f Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Sat, 17 Dec 2022 20:35:58 -0500 Subject: [PATCH 51/76] Cleanup tests Cleans up test args on new tests. Add log path to default config test so as not to mangle default log outside of tests. Match setup functions to archive/clone. Remove testing marker that was commited in error. --- pyproject.toml | 1 - .../test_archive_integration.py | 10 ++-------- .../integration_tests/test_clone_integration.py | 10 ++-------- .../test_download_integration.py | 16 +++++----------- 4 files changed, 9 insertions(+), 28 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 23ae690..6879a6d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,5 +78,4 @@ markers = [ "reddit: tests require a connection to Reddit", "slow: test is slow to run", "authenticated: test requires an authenticated Reddit instance", - "testing: incomplete tests", ] diff --git a/tests/integration_tests/test_archive_integration.py b/tests/integration_tests/test_archive_integration.py index 1c0d30a..c5ad9fb 100644 --- a/tests/integration_tests/test_archive_integration.py +++ b/tests/integration_tests/test_archive_integration.py @@ -184,14 +184,8 @@ def test_cli_archive_soft_fail(test_args: list[str], tmp_path: Path): @pytest.mark.parametrize( ("test_args", "response"), ( - ( - ["--user", "nasa", "--submitted"], - 502, - ), - ( - ["--user", "nasa", "--submitted"], - 504, - ), + (["--user", "nasa", "--submitted"], 502), + (["--user", "nasa", "--submitted"], 504), ), ) def test_user_serv_fail(test_args: list[str], response: int, tmp_path: Path): diff --git a/tests/integration_tests/test_clone_integration.py b/tests/integration_tests/test_clone_integration.py index eb64364..cba4102 100644 --- a/tests/integration_tests/test_clone_integration.py +++ b/tests/integration_tests/test_clone_integration.py @@ -76,14 +76,8 @@ def test_cli_scrape_soft_fail(test_args: list[str], tmp_path: Path): @pytest.mark.parametrize( ("test_args", "response"), ( - ( - ["--user", "nasa", "--submitted"], - 502, - ), - ( - ["--user", "nasa", "--submitted"], - 504, - ), + (["--user", "nasa", "--submitted"], 502), + (["--user", "nasa", "--submitted"], 504), ), ) def test_user_serv_fail(test_args: list[str], response: int, tmp_path: Path): diff --git a/tests/integration_tests/test_download_integration.py b/tests/integration_tests/test_download_integration.py index e44c95e..287e8d4 100644 --- a/tests/integration_tests/test_download_integration.py +++ b/tests/integration_tests/test_download_integration.py @@ -15,7 +15,7 @@ does_test_config_exist = Path("./tests/test_config.cfg").exists() def copy_test_config(run_path: Path): - shutil.copy(Path("./tests/test_config.cfg"), Path(run_path, "./test_config.cfg")) + shutil.copy(Path("./tests/test_config.cfg"), Path(run_path, "test_config.cfg")) def create_basic_args_for_download_runner(test_args: list[str], run_path: Path): @@ -25,7 +25,7 @@ def create_basic_args_for_download_runner(test_args: list[str], run_path: Path): str(run_path), "-v", "--config", - str(Path(run_path, "./test_config.cfg")), + str(Path(run_path, "test_config.cfg")), "--log", str(Path(run_path, "test_log.txt")), ] + test_args @@ -279,7 +279,7 @@ def test_cli_download_hard_fail(test_args: list[str], tmp_path: Path): def test_cli_download_use_default_config(tmp_path: Path): runner = CliRunner() - test_args = ["download", "-vv", str(tmp_path)] + test_args = ["download", "-vv", str(tmp_path), "--log", str(Path(tmp_path, "test_log.txt"))] result = runner.invoke(cli, test_args) assert result.exit_code == 0 @@ -404,14 +404,8 @@ def test_cli_download_score_filter(test_args: list[str], was_filtered: bool, tmp @pytest.mark.parametrize( ("test_args", "response"), ( - ( - ["--user", "nasa", "--submitted"], - 502, - ), - ( - ["--user", "nasa", "--submitted"], - 504, - ), + (["--user", "nasa", "--submitted"], 502), + (["--user", "nasa", "--submitted"], 504), ), ) def test_user_serv_fail(test_args: list[str], response: int, tmp_path: Path): From 603e7de04d9fe4d5717629b2606fe1b454615353 Mon Sep 17 00:00:00 2001 From: Soulsuck24 <79275800+Soulsuck24@users.noreply.github.com> Date: Mon, 19 Dec 2022 11:02:06 -0500 Subject: [PATCH 52/76] Redgifs fix Handle redgifs link with trailing / causing id to return empty string. --- bdfr/site_downloaders/redgifs.py | 2 ++ tests/site_downloaders/test_redgifs.py | 1 + 2 files changed, 3 insertions(+) diff --git a/bdfr/site_downloaders/redgifs.py b/bdfr/site_downloaders/redgifs.py index 625cf7d..84fb3c3 100644 --- a/bdfr/site_downloaders/redgifs.py +++ b/bdfr/site_downloaders/redgifs.py @@ -24,6 +24,8 @@ class Redgifs(BaseDownloader): @staticmethod def _get_link(url: str) -> set[str]: try: + if url.endswith("/"): + url = url.removesuffix("/") redgif_id = re.match(r".*/(.*?)(\..{0,})?$", url).group(1) except AttributeError: raise SiteDownloaderError(f"Could not extract Redgifs ID from {url}") diff --git a/tests/site_downloaders/test_redgifs.py b/tests/site_downloaders/test_redgifs.py index 0e1a497..bfe683f 100644 --- a/tests/site_downloaders/test_redgifs.py +++ b/tests/site_downloaders/test_redgifs.py @@ -29,6 +29,7 @@ from bdfr.site_downloaders.redgifs import Redgifs "UnripeUnkemptWoodpecker-large.jpg", }, ), + ("https://www.redgifs.com/watch/genuineprivateguillemot/", {"GenuinePrivateGuillemot.mp4"}), ), ) def test_get_link(test_url: str, expected: set[str]): From 2e2dfe671b78b00c695957ee0ea1844afb5fe446 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Mon, 19 Dec 2022 17:33:07 -0500 Subject: [PATCH 53/76] Fix fish/zsh completions fixes mistake in fish/zsh completions. --- bdfr/completion.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/bdfr/completion.py b/bdfr/completion.py index 8f4f122..fac944f 100644 --- a/bdfr/completion.py +++ b/bdfr/completion.py @@ -26,16 +26,16 @@ class Completion: comp_dir = self.share_dir + "/fish/vendor_completions.d/" for point in self.entry_points: self.env[f"_{point.upper().replace('-', '_')}_COMPLETE"] = "fish_source" - with open(comp_dir + point, "w") as file: + with open(comp_dir + point + ".fish", "w") as file: file.write(subprocess.run([point], env=self.env, capture_output=True, text=True).stdout) - print(f"Fish completion for {point} written to {comp_dir}{point}") + print(f"Fish completion for {point} written to {comp_dir}{point}.fish") if self.shell in ("all", "zsh"): comp_dir = self.share_dir + "/zsh/site-functions/" for point in self.entry_points: self.env[f"_{point.upper().replace('-', '_')}_COMPLETE"] = "zsh_source" - with open(comp_dir + point, "w") as file: + with open(comp_dir + "_" + point, "w") as file: file.write(subprocess.run([point], env=self.env, capture_output=True, text=True).stdout) - print(f"Zsh completion for {point} written to {comp_dir}{point}") + print(f"Zsh completion for {point} written to {comp_dir}_{point}") def uninstall(self): if self.shell in ("all", "bash"): @@ -47,12 +47,12 @@ class Completion: if self.shell in ("all", "fish"): comp_dir = self.share_dir + "/fish/vendor_completions.d/" for point in self.entry_points: - if os.path.exists(comp_dir + point): - os.remove(comp_dir + point) - print(f"Fish completion for {point} removed from {comp_dir}{point}") + if os.path.exists(comp_dir + point + ".fish"): + os.remove(comp_dir + point + ".fish") + print(f"Fish completion for {point} removed from {comp_dir}{point}.fish") if self.shell in ("all", "zsh"): comp_dir = self.share_dir + "/zsh/site-functions/" for point in self.entry_points: - if os.path.exists(comp_dir + point): - os.remove(comp_dir + point) - print(f"Zsh completion for {point} removed from {comp_dir}{point}") + if os.path.exists(comp_dir + "_" + point): + os.remove(comp_dir + "_" + point) + print(f"Zsh completion for {point} removed from {comp_dir}_{point}") From 5d3a539eda7edf77c6339b1ee664f674c3884920 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Mon, 19 Dec 2022 17:54:34 -0500 Subject: [PATCH 54/76] Fix install of completion if dirs missing Fixes situations if completion directories are missing and adds tests for installer. --- bdfr/completion.py | 9 ++++++++ tests/test_completion.py | 50 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 tests/test_completion.py diff --git a/bdfr/completion.py b/bdfr/completion.py index fac944f..43a9743 100644 --- a/bdfr/completion.py +++ b/bdfr/completion.py @@ -17,6 +17,9 @@ class Completion: def install(self): if self.shell in ("all", "bash"): comp_dir = self.share_dir + "/bash-completion/completions/" + if not os.path.exists(comp_dir): + print("Creating Bash completion directory.") + os.makedirs(comp_dir, exist_ok=True) for point in self.entry_points: self.env[f"_{point.upper().replace('-', '_')}_COMPLETE"] = "bash_source" with open(comp_dir + point, "w") as file: @@ -24,6 +27,9 @@ class Completion: print(f"Bash completion for {point} written to {comp_dir}{point}") if self.shell in ("all", "fish"): comp_dir = self.share_dir + "/fish/vendor_completions.d/" + if not os.path.exists(comp_dir): + print("Creating Fish completion directory.") + os.makedirs(comp_dir, exist_ok=True) for point in self.entry_points: self.env[f"_{point.upper().replace('-', '_')}_COMPLETE"] = "fish_source" with open(comp_dir + point + ".fish", "w") as file: @@ -31,6 +37,9 @@ class Completion: print(f"Fish completion for {point} written to {comp_dir}{point}.fish") if self.shell in ("all", "zsh"): comp_dir = self.share_dir + "/zsh/site-functions/" + if not os.path.exists(comp_dir): + print("Creating Zsh completion directory.") + os.makedirs(comp_dir, exist_ok=True) for point in self.entry_points: self.env[f"_{point.upper().replace('-', '_')}_COMPLETE"] = "zsh_source" with open(comp_dir + "_" + point, "w") as file: diff --git a/tests/test_completion.py b/tests/test_completion.py new file mode 100644 index 0000000..91f9fd2 --- /dev/null +++ b/tests/test_completion.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import sys +from pathlib import Path +from unittest.mock import patch + +import pytest + +from bdfr.completion import Completion + + +@pytest.mark.skipif(sys.platform == "win32", reason="Completions are not currently supported on Windows.") +def test_cli_completion_all(tmp_path: Path): + with patch("appdirs.user_data_dir", return_value=str(tmp_path)): + Completion("all").install() + assert Path.exists(Path(str(tmp_path) + "/bash-completion/completions/bdfr")) == 1 + assert Path.exists(Path(str(tmp_path) + "/fish/vendor_completions.d/bdfr.fish")) == 1 + assert Path.exists(Path(str(tmp_path) + "/zsh/site-functions/_bdfr")) == 1 + Completion("all").uninstall() + assert Path.exists(Path(str(tmp_path) + "/bash-completion/completions/bdfr")) == 0 + assert Path.exists(Path(str(tmp_path) + "/fish/vendor_completions.d/bdfr.fish")) == 0 + assert Path.exists(Path(str(tmp_path) + "/zsh/site-functions/_bdfr")) == 0 + + +@pytest.mark.skipif(sys.platform == "win32", reason="Completions are not currently supported on Windows.") +def test_cli_completion_bash(tmp_path: Path): + with patch("appdirs.user_data_dir", return_value=str(tmp_path)): + Completion("bash").install() + assert Path.exists(Path(str(tmp_path) + "/bash-completion/completions/bdfr")) == 1 + Completion("bash").uninstall() + assert Path.exists(Path(str(tmp_path) + "/bash-completion/completions/bdfr")) == 0 + + +@pytest.mark.skipif(sys.platform == "win32", reason="Completions are not currently supported on Windows.") +def test_cli_completion_fish(tmp_path: Path): + with patch("appdirs.user_data_dir", return_value=str(tmp_path)): + Completion("fish").install() + assert Path.exists(Path(str(tmp_path) + "/fish/vendor_completions.d/bdfr.fish")) == 1 + Completion("fish").uninstall() + assert Path.exists(Path(str(tmp_path) + "/fish/vendor_completions.d/bdfr.fish")) == 0 + + +@pytest.mark.skipif(sys.platform == "win32", reason="Completions are not currently supported on Windows.") +def test_cli_completion_zsh(tmp_path: Path): + with patch("appdirs.user_data_dir", return_value=str(tmp_path)): + Completion("zsh").install() + assert Path.exists(Path(str(tmp_path) + "/zsh/site-functions/_bdfr")) == 1 + Completion("zsh").uninstall() + assert Path.exists(Path(str(tmp_path) + "/zsh/site-functions/_bdfr")) == 0 From 83f45e7f60e7fd6ff5d1156af0ebf28904515ac8 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Mon, 19 Dec 2022 18:32:37 -0500 Subject: [PATCH 55/76] Standardize shebang and coding declaration Standardizes shebang and coding declarations. Coding matches what's used by install tools such as pip(x). Removes a few init files that were not needed. --- bdfr/__init__.py | 3 +++ bdfr/__main__.py | 1 + bdfr/archive_entry/__init__.py | 2 -- bdfr/archive_entry/base_archive_entry.py | 2 +- bdfr/archive_entry/comment_archive_entry.py | 2 +- bdfr/archive_entry/submission_archive_entry.py | 2 +- bdfr/archiver.py | 2 +- bdfr/cloner.py | 2 +- bdfr/completion.py | 2 +- bdfr/configuration.py | 2 +- bdfr/connector.py | 2 +- bdfr/download_filter.py | 2 +- bdfr/downloader.py | 2 +- bdfr/exceptions.py | 3 ++- bdfr/file_name_formatter.py | 3 ++- bdfr/oauth2.py | 2 +- bdfr/resource.py | 2 +- bdfr/site_authenticator.py | 2 +- bdfr/site_downloaders/__init__.py | 0 bdfr/site_downloaders/base_downloader.py | 2 +- bdfr/site_downloaders/delay_for_reddit.py | 1 + bdfr/site_downloaders/direct.py | 1 + bdfr/site_downloaders/download_factory.py | 2 +- bdfr/site_downloaders/erome.py | 1 + bdfr/site_downloaders/fallback_downloaders/__init__.py | 0 .../fallback_downloaders/fallback_downloader.py | 2 +- bdfr/site_downloaders/fallback_downloaders/ytdlp_fallback.py | 2 +- bdfr/site_downloaders/gallery.py | 1 + bdfr/site_downloaders/gfycat.py | 1 + bdfr/site_downloaders/imgur.py | 1 + bdfr/site_downloaders/pornhub.py | 2 +- bdfr/site_downloaders/redgifs.py | 1 + bdfr/site_downloaders/self_post.py | 1 + bdfr/site_downloaders/vidble.py | 3 ++- bdfr/site_downloaders/vreddit.py | 1 + bdfr/site_downloaders/youtube.py | 1 + tests/__init__.py | 0 tests/archive_entry/__init__.py | 2 -- tests/archive_entry/test_comment_archive_entry.py | 2 +- tests/archive_entry/test_submission_archive_entry.py | 2 +- tests/conftest.py | 2 +- tests/integration_tests/__init__.py | 2 -- tests/integration_tests/test_archive_integration.py | 2 +- tests/integration_tests/test_clone_integration.py | 2 +- tests/integration_tests/test_download_integration.py | 2 +- tests/site_downloaders/__init__.py | 0 tests/site_downloaders/fallback_downloaders/__init__.py | 0 .../fallback_downloaders/test_ytdlp_fallback.py | 1 + tests/site_downloaders/test_delay_for_reddit.py | 2 +- tests/site_downloaders/test_direct.py | 2 +- tests/site_downloaders/test_download_factory.py | 2 +- tests/site_downloaders/test_erome.py | 3 ++- tests/site_downloaders/test_gallery.py | 2 +- tests/site_downloaders/test_gfycat.py | 2 +- tests/site_downloaders/test_imgur.py | 2 +- tests/site_downloaders/test_pornhub.py | 2 +- tests/site_downloaders/test_redgifs.py | 2 +- tests/site_downloaders/test_self_post.py | 2 +- tests/site_downloaders/test_vidble.py | 3 ++- tests/site_downloaders/test_vreddit.py | 2 +- tests/site_downloaders/test_youtube.py | 2 +- tests/test_archiver.py | 2 +- tests/test_configuration.py | 2 +- tests/test_connector.py | 3 ++- tests/test_download_filter.py | 2 +- tests/test_downloader.py | 2 +- tests/test_file_name_formatter.py | 4 ++-- tests/test_oauth2.py | 2 +- tests/test_resource.py | 2 +- 69 files changed, 70 insertions(+), 55 deletions(-) delete mode 100644 bdfr/archive_entry/__init__.py delete mode 100644 bdfr/site_downloaders/__init__.py delete mode 100644 bdfr/site_downloaders/fallback_downloaders/__init__.py delete mode 100644 tests/__init__.py delete mode 100644 tests/archive_entry/__init__.py delete mode 100644 tests/integration_tests/__init__.py delete mode 100644 tests/site_downloaders/__init__.py delete mode 100644 tests/site_downloaders/fallback_downloaders/__init__.py diff --git a/bdfr/__init__.py b/bdfr/__init__.py index b482efe..6bcee53 100644 --- a/bdfr/__init__.py +++ b/bdfr/__init__.py @@ -1 +1,4 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + __version__ = "2.6.2" diff --git a/bdfr/__main__.py b/bdfr/__main__.py index 57373c9..e35ba0a 100644 --- a/bdfr/__main__.py +++ b/bdfr/__main__.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# -*- coding: utf-8 -*- import logging import sys diff --git a/bdfr/archive_entry/__init__.py b/bdfr/archive_entry/__init__.py deleted file mode 100644 index d4c1799..0000000 --- a/bdfr/archive_entry/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env python3 -# coding=utf-8 diff --git a/bdfr/archive_entry/base_archive_entry.py b/bdfr/archive_entry/base_archive_entry.py index 49ea58a..3dea5e4 100644 --- a/bdfr/archive_entry/base_archive_entry.py +++ b/bdfr/archive_entry/base_archive_entry.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- from abc import ABC, abstractmethod from typing import Union diff --git a/bdfr/archive_entry/comment_archive_entry.py b/bdfr/archive_entry/comment_archive_entry.py index 1c72811..cc59373 100644 --- a/bdfr/archive_entry/comment_archive_entry.py +++ b/bdfr/archive_entry/comment_archive_entry.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import logging diff --git a/bdfr/archive_entry/submission_archive_entry.py b/bdfr/archive_entry/submission_archive_entry.py index 92f326e..38f1d34 100644 --- a/bdfr/archive_entry/submission_archive_entry.py +++ b/bdfr/archive_entry/submission_archive_entry.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import logging diff --git a/bdfr/archiver.py b/bdfr/archiver.py index e2ed33d..be5a445 100644 --- a/bdfr/archiver.py +++ b/bdfr/archiver.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import json import logging diff --git a/bdfr/cloner.py b/bdfr/cloner.py index e82cfaa..53108c0 100644 --- a/bdfr/cloner.py +++ b/bdfr/cloner.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import logging from time import sleep diff --git a/bdfr/completion.py b/bdfr/completion.py index 43a9743..9c7d6b2 100644 --- a/bdfr/completion.py +++ b/bdfr/completion.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import os import subprocess diff --git a/bdfr/configuration.py b/bdfr/configuration.py index a2a5310..0d00192 100644 --- a/bdfr/configuration.py +++ b/bdfr/configuration.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import logging from argparse import Namespace diff --git a/bdfr/connector.py b/bdfr/connector.py index e5d74a2..bf50f32 100644 --- a/bdfr/connector.py +++ b/bdfr/connector.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import configparser import importlib.resources diff --git a/bdfr/download_filter.py b/bdfr/download_filter.py index 9019cc9..0def316 100644 --- a/bdfr/download_filter.py +++ b/bdfr/download_filter.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import logging import re diff --git a/bdfr/downloader.py b/bdfr/downloader.py index 1cb6d46..7ad8a6b 100644 --- a/bdfr/downloader.py +++ b/bdfr/downloader.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import hashlib import logging.handlers diff --git a/bdfr/exceptions.py b/bdfr/exceptions.py index 1757cd9..e7e4415 100644 --- a/bdfr/exceptions.py +++ b/bdfr/exceptions.py @@ -1,4 +1,5 @@ -#!/usr/bin/env +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- class BulkDownloaderException(Exception): diff --git a/bdfr/file_name_formatter.py b/bdfr/file_name_formatter.py index 684c626..9ee481d 100644 --- a/bdfr/file_name_formatter.py +++ b/bdfr/file_name_formatter.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- + import datetime import logging import platform diff --git a/bdfr/oauth2.py b/bdfr/oauth2.py index 60f2169..28b956a 100644 --- a/bdfr/oauth2.py +++ b/bdfr/oauth2.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import configparser import logging diff --git a/bdfr/resource.py b/bdfr/resource.py index 0f5404c..bd3ae88 100644 --- a/bdfr/resource.py +++ b/bdfr/resource.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import hashlib import logging diff --git a/bdfr/site_authenticator.py b/bdfr/site_authenticator.py index bbf3b46..08b98e0 100644 --- a/bdfr/site_authenticator.py +++ b/bdfr/site_authenticator.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import configparser diff --git a/bdfr/site_downloaders/__init__.py b/bdfr/site_downloaders/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bdfr/site_downloaders/base_downloader.py b/bdfr/site_downloaders/base_downloader.py index f3ecec5..e4ac111 100644 --- a/bdfr/site_downloaders/base_downloader.py +++ b/bdfr/site_downloaders/base_downloader.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import logging from abc import ABC, abstractmethod diff --git a/bdfr/site_downloaders/delay_for_reddit.py b/bdfr/site_downloaders/delay_for_reddit.py index 3380731..40a7f9b 100644 --- a/bdfr/site_downloaders/delay_for_reddit.py +++ b/bdfr/site_downloaders/delay_for_reddit.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# -*- coding: utf-8 -*- import logging from typing import Optional diff --git a/bdfr/site_downloaders/direct.py b/bdfr/site_downloaders/direct.py index 4a6ac92..061ad7f 100644 --- a/bdfr/site_downloaders/direct.py +++ b/bdfr/site_downloaders/direct.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# -*- coding: utf-8 -*- from typing import Optional diff --git a/bdfr/site_downloaders/download_factory.py b/bdfr/site_downloaders/download_factory.py index 638316f..d44be21 100644 --- a/bdfr/site_downloaders/download_factory.py +++ b/bdfr/site_downloaders/download_factory.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import re import urllib.parse diff --git a/bdfr/site_downloaders/erome.py b/bdfr/site_downloaders/erome.py index 26469bc..bf139d2 100644 --- a/bdfr/site_downloaders/erome.py +++ b/bdfr/site_downloaders/erome.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# -*- coding: utf-8 -*- import logging import re diff --git a/bdfr/site_downloaders/fallback_downloaders/__init__.py b/bdfr/site_downloaders/fallback_downloaders/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bdfr/site_downloaders/fallback_downloaders/fallback_downloader.py b/bdfr/site_downloaders/fallback_downloaders/fallback_downloader.py index 3bc615d..124724a 100644 --- a/bdfr/site_downloaders/fallback_downloaders/fallback_downloader.py +++ b/bdfr/site_downloaders/fallback_downloaders/fallback_downloader.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- from abc import ABC, abstractmethod diff --git a/bdfr/site_downloaders/fallback_downloaders/ytdlp_fallback.py b/bdfr/site_downloaders/fallback_downloaders/ytdlp_fallback.py index 900c8e9..41f8474 100644 --- a/bdfr/site_downloaders/fallback_downloaders/ytdlp_fallback.py +++ b/bdfr/site_downloaders/fallback_downloaders/ytdlp_fallback.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import logging from typing import Optional diff --git a/bdfr/site_downloaders/gallery.py b/bdfr/site_downloaders/gallery.py index 278932f..6f00410 100644 --- a/bdfr/site_downloaders/gallery.py +++ b/bdfr/site_downloaders/gallery.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# -*- coding: utf-8 -*- import logging from typing import Optional diff --git a/bdfr/site_downloaders/gfycat.py b/bdfr/site_downloaders/gfycat.py index 7862d33..d7c60ca 100644 --- a/bdfr/site_downloaders/gfycat.py +++ b/bdfr/site_downloaders/gfycat.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# -*- coding: utf-8 -*- import json import re diff --git a/bdfr/site_downloaders/imgur.py b/bdfr/site_downloaders/imgur.py index 0b9ecdd..a4c378f 100644 --- a/bdfr/site_downloaders/imgur.py +++ b/bdfr/site_downloaders/imgur.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# -*- coding: utf-8 -*- import json import re diff --git a/bdfr/site_downloaders/pornhub.py b/bdfr/site_downloaders/pornhub.py index db37720..8ce4492 100644 --- a/bdfr/site_downloaders/pornhub.py +++ b/bdfr/site_downloaders/pornhub.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import logging from typing import Optional diff --git a/bdfr/site_downloaders/redgifs.py b/bdfr/site_downloaders/redgifs.py index 625cf7d..3144c22 100644 --- a/bdfr/site_downloaders/redgifs.py +++ b/bdfr/site_downloaders/redgifs.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# -*- coding: utf-8 -*- import json import re diff --git a/bdfr/site_downloaders/self_post.py b/bdfr/site_downloaders/self_post.py index 1b76b92..5719e59 100644 --- a/bdfr/site_downloaders/self_post.py +++ b/bdfr/site_downloaders/self_post.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# -*- coding: utf-8 -*- import logging from typing import Optional diff --git a/bdfr/site_downloaders/vidble.py b/bdfr/site_downloaders/vidble.py index a79ee25..aa1e949 100644 --- a/bdfr/site_downloaders/vidble.py +++ b/bdfr/site_downloaders/vidble.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- + import itertools import logging import re diff --git a/bdfr/site_downloaders/vreddit.py b/bdfr/site_downloaders/vreddit.py index a71d350..8f6022e 100644 --- a/bdfr/site_downloaders/vreddit.py +++ b/bdfr/site_downloaders/vreddit.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# -*- coding: utf-8 -*- import logging import tempfile diff --git a/bdfr/site_downloaders/youtube.py b/bdfr/site_downloaders/youtube.py index f4f8622..f0c0677 100644 --- a/bdfr/site_downloaders/youtube.py +++ b/bdfr/site_downloaders/youtube.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# -*- coding: utf-8 -*- import logging import tempfile diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/archive_entry/__init__.py b/tests/archive_entry/__init__.py deleted file mode 100644 index d4c1799..0000000 --- a/tests/archive_entry/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env python3 -# coding=utf-8 diff --git a/tests/archive_entry/test_comment_archive_entry.py b/tests/archive_entry/test_comment_archive_entry.py index 8e6f224..1895a89 100644 --- a/tests/archive_entry/test_comment_archive_entry.py +++ b/tests/archive_entry/test_comment_archive_entry.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import praw import pytest diff --git a/tests/archive_entry/test_submission_archive_entry.py b/tests/archive_entry/test_submission_archive_entry.py index 666eec3..8b83f1d 100644 --- a/tests/archive_entry/test_submission_archive_entry.py +++ b/tests/archive_entry/test_submission_archive_entry.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import praw import pytest diff --git a/tests/conftest.py b/tests/conftest.py index 3f871a3..77a26fb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import configparser import socket diff --git a/tests/integration_tests/__init__.py b/tests/integration_tests/__init__.py deleted file mode 100644 index d4c1799..0000000 --- a/tests/integration_tests/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env python3 -# coding=utf-8 diff --git a/tests/integration_tests/test_archive_integration.py b/tests/integration_tests/test_archive_integration.py index c5ad9fb..42689a8 100644 --- a/tests/integration_tests/test_archive_integration.py +++ b/tests/integration_tests/test_archive_integration.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import re import shutil diff --git a/tests/integration_tests/test_clone_integration.py b/tests/integration_tests/test_clone_integration.py index cba4102..60e4012 100644 --- a/tests/integration_tests/test_clone_integration.py +++ b/tests/integration_tests/test_clone_integration.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import shutil from pathlib import Path diff --git a/tests/integration_tests/test_download_integration.py b/tests/integration_tests/test_download_integration.py index 287e8d4..138ea61 100644 --- a/tests/integration_tests/test_download_integration.py +++ b/tests/integration_tests/test_download_integration.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import shutil from pathlib import Path diff --git a/tests/site_downloaders/__init__.py b/tests/site_downloaders/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/site_downloaders/fallback_downloaders/__init__.py b/tests/site_downloaders/fallback_downloaders/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py b/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py index 9823d08..b735539 100644 --- a/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py +++ b/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# -*- coding: utf-8 -*- from unittest.mock import MagicMock diff --git a/tests/site_downloaders/test_delay_for_reddit.py b/tests/site_downloaders/test_delay_for_reddit.py index 65d080c..045c022 100644 --- a/tests/site_downloaders/test_delay_for_reddit.py +++ b/tests/site_downloaders/test_delay_for_reddit.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- from unittest.mock import Mock diff --git a/tests/site_downloaders/test_direct.py b/tests/site_downloaders/test_direct.py index b652d9a..14190ee 100644 --- a/tests/site_downloaders/test_direct.py +++ b/tests/site_downloaders/test_direct.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- from unittest.mock import Mock diff --git a/tests/site_downloaders/test_download_factory.py b/tests/site_downloaders/test_download_factory.py index 581656d..2dc3a06 100644 --- a/tests/site_downloaders/test_download_factory.py +++ b/tests/site_downloaders/test_download_factory.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import praw import pytest diff --git a/tests/site_downloaders/test_erome.py b/tests/site_downloaders/test_erome.py index 1baeb66..ce32e88 100644 --- a/tests/site_downloaders/test_erome.py +++ b/tests/site_downloaders/test_erome.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- + import re from unittest.mock import MagicMock diff --git a/tests/site_downloaders/test_gallery.py b/tests/site_downloaders/test_gallery.py index 57d055b..c3cc86f 100644 --- a/tests/site_downloaders/test_gallery.py +++ b/tests/site_downloaders/test_gallery.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import praw import pytest diff --git a/tests/site_downloaders/test_gfycat.py b/tests/site_downloaders/test_gfycat.py index d436636..0cfb36f 100644 --- a/tests/site_downloaders/test_gfycat.py +++ b/tests/site_downloaders/test_gfycat.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- from unittest.mock import Mock diff --git a/tests/site_downloaders/test_imgur.py b/tests/site_downloaders/test_imgur.py index 6b49cd5..7f587c9 100644 --- a/tests/site_downloaders/test_imgur.py +++ b/tests/site_downloaders/test_imgur.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- from unittest.mock import Mock diff --git a/tests/site_downloaders/test_pornhub.py b/tests/site_downloaders/test_pornhub.py index 42ca5a0..d9971cb 100644 --- a/tests/site_downloaders/test_pornhub.py +++ b/tests/site_downloaders/test_pornhub.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- from unittest.mock import MagicMock diff --git a/tests/site_downloaders/test_redgifs.py b/tests/site_downloaders/test_redgifs.py index 0e1a497..6f5ce50 100644 --- a/tests/site_downloaders/test_redgifs.py +++ b/tests/site_downloaders/test_redgifs.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import re from unittest.mock import Mock diff --git a/tests/site_downloaders/test_self_post.py b/tests/site_downloaders/test_self_post.py index 104fb3b..9574b3c 100644 --- a/tests/site_downloaders/test_self_post.py +++ b/tests/site_downloaders/test_self_post.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import praw import pytest diff --git a/tests/site_downloaders/test_vidble.py b/tests/site_downloaders/test_vidble.py index 16b5a3b..41398e7 100644 --- a/tests/site_downloaders/test_vidble.py +++ b/tests/site_downloaders/test_vidble.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- + from unittest.mock import Mock import pytest diff --git a/tests/site_downloaders/test_vreddit.py b/tests/site_downloaders/test_vreddit.py index 6e79ba0..d5cc121 100644 --- a/tests/site_downloaders/test_vreddit.py +++ b/tests/site_downloaders/test_vreddit.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- from unittest.mock import MagicMock diff --git a/tests/site_downloaders/test_youtube.py b/tests/site_downloaders/test_youtube.py index 7a45a3c..3100215 100644 --- a/tests/site_downloaders/test_youtube.py +++ b/tests/site_downloaders/test_youtube.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- from unittest.mock import MagicMock diff --git a/tests/test_archiver.py b/tests/test_archiver.py index 932a2ab..cdd12d0 100644 --- a/tests/test_archiver.py +++ b/tests/test_archiver.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- from pathlib import Path from unittest.mock import MagicMock diff --git a/tests/test_configuration.py b/tests/test_configuration.py index 652c401..e7999b3 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- from unittest.mock import MagicMock diff --git a/tests/test_connector.py b/tests/test_connector.py index 01b6a92..9681e4b 100644 --- a/tests/test_connector.py +++ b/tests/test_connector.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- + from datetime import datetime, timedelta from pathlib import Path from typing import Iterator diff --git a/tests/test_download_filter.py b/tests/test_download_filter.py index 07b7d67..3b4d6b8 100644 --- a/tests/test_download_filter.py +++ b/tests/test_download_filter.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- from unittest.mock import MagicMock diff --git a/tests/test_downloader.py b/tests/test_downloader.py index ba81b80..d7aa8dd 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import os import re diff --git a/tests/test_file_name_formatter.py b/tests/test_file_name_formatter.py index c04e07d..4964b3b 100644 --- a/tests/test_file_name_formatter.py +++ b/tests/test_file_name_formatter.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import platform import sys @@ -46,7 +46,7 @@ def do_test_path_equality(result: Path, expected: str) -> bool: expected = Path(*expected) else: expected = Path(expected) - return str(result).endswith(str(expected)) + return str(result).endswith(str(expected)) # noqa: FURB123 @pytest.fixture(scope="session") diff --git a/tests/test_oauth2.py b/tests/test_oauth2.py index 3014c37..123f750 100644 --- a/tests/test_oauth2.py +++ b/tests/test_oauth2.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import configparser from pathlib import Path diff --git a/tests/test_resource.py b/tests/test_resource.py index 146d9a0..e17d16a 100644 --- a/tests/test_resource.py +++ b/tests/test_resource.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- from unittest.mock import MagicMock From b3e477720667bbf5a39c21dc14b62852fd96465b Mon Sep 17 00:00:00 2001 From: OMEGA_RAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Mon, 19 Dec 2022 22:02:16 -0500 Subject: [PATCH 56/76] Update download_factory.py Attempt to fix #724 Narrows down characters available to extensions in the regex. Outside of 3 and 4, the only extensions that I can think of this doesn't hit are bz2 and 7z (which wasn't caught before). --- bdfr/site_downloaders/download_factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdfr/site_downloaders/download_factory.py b/bdfr/site_downloaders/download_factory.py index 638316f..1bc7507 100644 --- a/bdfr/site_downloaders/download_factory.py +++ b/bdfr/site_downloaders/download_factory.py @@ -30,7 +30,7 @@ class DownloadFactory: return Imgur elif re.match(r"(i\.)?(redgifs|gifdeliverynetwork)", sanitised_url): return Redgifs - elif re.match(r".*/.*\.\w{3,4}(\?[\w;&=]*)?$", sanitised_url) and not DownloadFactory.is_web_resource( + elif re.match(r".*/.*\.[a-zA-Z34]{3,4}(\?[\w;&=]*)?$", sanitised_url) and not DownloadFactory.is_web_resource( sanitised_url ): return Direct From da74096cdec9561593c04f1a39656c916ec10943 Mon Sep 17 00:00:00 2001 From: OMEGA_RAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Mon, 19 Dec 2022 22:04:49 -0500 Subject: [PATCH 57/76] Update test_download_factory.py --- tests/site_downloaders/test_download_factory.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/site_downloaders/test_download_factory.py b/tests/site_downloaders/test_download_factory.py index 581656d..b369486 100644 --- a/tests/site_downloaders/test_download_factory.py +++ b/tests/site_downloaders/test_download_factory.py @@ -65,6 +65,7 @@ def test_factory_lever_good(test_submission_url: str, expected_class: BaseDownlo "https://www.google.com", "https://www.google.com/test", "https://www.google.com/test/", + "https://www.tiktok.com/@keriberry.420", ), ) def test_factory_lever_bad(test_url: str): From 57ac0130a62eca0176e6d3a2bebe45f81a46558a Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Tue, 20 Dec 2022 12:16:31 -0500 Subject: [PATCH 58/76] revert init files --- README.md | 4 +++- bdfr/archive_entry/__init__.py | 2 ++ bdfr/site_downloaders/__init__.py | 2 ++ bdfr/site_downloaders/fallback_downloaders/__init__.py | 2 ++ tests/__init__.py | 2 ++ tests/archive_entry/__init__.py | 2 ++ tests/integration_tests/__init__.py | 2 ++ tests/site_downloaders/__init__.py | 2 ++ tests/site_downloaders/fallback_downloaders/__init__.py | 2 ++ 9 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 bdfr/archive_entry/__init__.py create mode 100644 bdfr/site_downloaders/__init__.py create mode 100644 bdfr/site_downloaders/fallback_downloaders/__init__.py create mode 100644 tests/__init__.py create mode 100644 tests/archive_entry/__init__.py create mode 100644 tests/integration_tests/__init__.py create mode 100644 tests/site_downloaders/__init__.py create mode 100644 tests/site_downloaders/fallback_downloaders/__init__.py diff --git a/README.md b/README.md index f732f6e..120448e 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,9 @@ would be equilavent to (take note that in YAML there is `file_scheme` instead of bdfr download ./path/to/output --skip mp4 --skip avi --file-scheme "{UPVOTES}_{REDDITOR}_{POSTID}_{DATE}" -L 10 -S top --subreddit EarthPorn --subreddit CityPorn ``` -In case when the same option is specified both in the YAML file and in as a command line argument, the command line argument takes prs +Any option that can be specified multiple times should be formatted like subreddit is above. + +In case when the same option is specified both in the YAML file and in as a command line argument, the command line argument takes priority ## Options diff --git a/bdfr/archive_entry/__init__.py b/bdfr/archive_entry/__init__.py new file mode 100644 index 0000000..56fafa5 --- /dev/null +++ b/bdfr/archive_entry/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- diff --git a/bdfr/site_downloaders/__init__.py b/bdfr/site_downloaders/__init__.py new file mode 100644 index 0000000..56fafa5 --- /dev/null +++ b/bdfr/site_downloaders/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- diff --git a/bdfr/site_downloaders/fallback_downloaders/__init__.py b/bdfr/site_downloaders/fallback_downloaders/__init__.py new file mode 100644 index 0000000..56fafa5 --- /dev/null +++ b/bdfr/site_downloaders/fallback_downloaders/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..56fafa5 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- diff --git a/tests/archive_entry/__init__.py b/tests/archive_entry/__init__.py new file mode 100644 index 0000000..56fafa5 --- /dev/null +++ b/tests/archive_entry/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- diff --git a/tests/integration_tests/__init__.py b/tests/integration_tests/__init__.py new file mode 100644 index 0000000..56fafa5 --- /dev/null +++ b/tests/integration_tests/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- diff --git a/tests/site_downloaders/__init__.py b/tests/site_downloaders/__init__.py new file mode 100644 index 0000000..56fafa5 --- /dev/null +++ b/tests/site_downloaders/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- diff --git a/tests/site_downloaders/fallback_downloaders/__init__.py b/tests/site_downloaders/fallback_downloaders/__init__.py new file mode 100644 index 0000000..56fafa5 --- /dev/null +++ b/tests/site_downloaders/fallback_downloaders/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- From 2aea7d0d489299dd91cc4c335add67c177fcefcc Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Tue, 20 Dec 2022 13:05:50 -0500 Subject: [PATCH 59/76] Move completion to pathlib --- bdfr/completion.py | 29 +++++++++++++++-------------- tests/test_completion.py | 36 ++++++++++++++++++++---------------- 2 files changed, 35 insertions(+), 30 deletions(-) diff --git a/bdfr/completion.py b/bdfr/completion.py index 43a9743..668fb2a 100644 --- a/bdfr/completion.py +++ b/bdfr/completion.py @@ -1,8 +1,9 @@ #!/usr/bin/env python3 # coding=utf-8 -import os import subprocess +from os import environ +from pathlib import Path import appdirs @@ -10,16 +11,16 @@ import appdirs class Completion: def __init__(self, shell: str): self.shell = shell - self.env = os.environ.copy() + self.env = environ.copy() self.share_dir = appdirs.user_data_dir() self.entry_points = ["bdfr", "bdfr-archive", "bdfr-clone", "bdfr-download"] def install(self): if self.shell in ("all", "bash"): comp_dir = self.share_dir + "/bash-completion/completions/" - if not os.path.exists(comp_dir): + if not Path(comp_dir).exists(): print("Creating Bash completion directory.") - os.makedirs(comp_dir, exist_ok=True) + Path(comp_dir).mkdir(parents=True, exist_ok=True) for point in self.entry_points: self.env[f"_{point.upper().replace('-', '_')}_COMPLETE"] = "bash_source" with open(comp_dir + point, "w") as file: @@ -27,9 +28,9 @@ class Completion: print(f"Bash completion for {point} written to {comp_dir}{point}") if self.shell in ("all", "fish"): comp_dir = self.share_dir + "/fish/vendor_completions.d/" - if not os.path.exists(comp_dir): + if not Path(comp_dir).exists(): print("Creating Fish completion directory.") - os.makedirs(comp_dir, exist_ok=True) + Path(comp_dir).mkdir(parents=True, exist_ok=True) for point in self.entry_points: self.env[f"_{point.upper().replace('-', '_')}_COMPLETE"] = "fish_source" with open(comp_dir + point + ".fish", "w") as file: @@ -37,9 +38,9 @@ class Completion: print(f"Fish completion for {point} written to {comp_dir}{point}.fish") if self.shell in ("all", "zsh"): comp_dir = self.share_dir + "/zsh/site-functions/" - if not os.path.exists(comp_dir): + if not Path(comp_dir).exists(): print("Creating Zsh completion directory.") - os.makedirs(comp_dir, exist_ok=True) + Path(comp_dir).mkdir(parents=True, exist_ok=True) for point in self.entry_points: self.env[f"_{point.upper().replace('-', '_')}_COMPLETE"] = "zsh_source" with open(comp_dir + "_" + point, "w") as file: @@ -50,18 +51,18 @@ class Completion: if self.shell in ("all", "bash"): comp_dir = self.share_dir + "/bash-completion/completions/" for point in self.entry_points: - if os.path.exists(comp_dir + point): - os.remove(comp_dir + point) + if Path(comp_dir + point).exists(): + Path(comp_dir + point).unlink() print(f"Bash completion for {point} removed from {comp_dir}{point}") if self.shell in ("all", "fish"): comp_dir = self.share_dir + "/fish/vendor_completions.d/" for point in self.entry_points: - if os.path.exists(comp_dir + point + ".fish"): - os.remove(comp_dir + point + ".fish") + if Path(comp_dir + point + ".fish").exists(): + Path(comp_dir + point + ".fish").unlink() print(f"Fish completion for {point} removed from {comp_dir}{point}.fish") if self.shell in ("all", "zsh"): comp_dir = self.share_dir + "/zsh/site-functions/" for point in self.entry_points: - if os.path.exists(comp_dir + "_" + point): - os.remove(comp_dir + "_" + point) + if Path(comp_dir + "_" + point).exists(): + Path(comp_dir + "_" + point).unlink() print(f"Zsh completion for {point} removed from {comp_dir}_{point}") diff --git a/tests/test_completion.py b/tests/test_completion.py index 91f9fd2..e29682a 100644 --- a/tests/test_completion.py +++ b/tests/test_completion.py @@ -12,39 +12,43 @@ from bdfr.completion import Completion @pytest.mark.skipif(sys.platform == "win32", reason="Completions are not currently supported on Windows.") def test_cli_completion_all(tmp_path: Path): - with patch("appdirs.user_data_dir", return_value=str(tmp_path)): + tmp_path = str(tmp_path) + with patch("appdirs.user_data_dir", return_value=tmp_path): Completion("all").install() - assert Path.exists(Path(str(tmp_path) + "/bash-completion/completions/bdfr")) == 1 - assert Path.exists(Path(str(tmp_path) + "/fish/vendor_completions.d/bdfr.fish")) == 1 - assert Path.exists(Path(str(tmp_path) + "/zsh/site-functions/_bdfr")) == 1 + assert Path(tmp_path + "/bash-completion/completions/bdfr").exists() == 1 + assert Path(tmp_path + "/fish/vendor_completions.d/bdfr.fish").exists() == 1 + assert Path(tmp_path + "/zsh/site-functions/_bdfr").exists() == 1 Completion("all").uninstall() - assert Path.exists(Path(str(tmp_path) + "/bash-completion/completions/bdfr")) == 0 - assert Path.exists(Path(str(tmp_path) + "/fish/vendor_completions.d/bdfr.fish")) == 0 - assert Path.exists(Path(str(tmp_path) + "/zsh/site-functions/_bdfr")) == 0 + assert Path(tmp_path + "/bash-completion/completions/bdfr").exists() == 0 + assert Path(tmp_path + "/fish/vendor_completions.d/bdfr.fish").exists() == 0 + assert Path(tmp_path + "/zsh/site-functions/_bdfr").exists() == 0 @pytest.mark.skipif(sys.platform == "win32", reason="Completions are not currently supported on Windows.") def test_cli_completion_bash(tmp_path: Path): - with patch("appdirs.user_data_dir", return_value=str(tmp_path)): + tmp_path = str(tmp_path) + with patch("appdirs.user_data_dir", return_value=tmp_path): Completion("bash").install() - assert Path.exists(Path(str(tmp_path) + "/bash-completion/completions/bdfr")) == 1 + assert Path(tmp_path + "/bash-completion/completions/bdfr").exists() == 1 Completion("bash").uninstall() - assert Path.exists(Path(str(tmp_path) + "/bash-completion/completions/bdfr")) == 0 + assert Path(tmp_path + "/bash-completion/completions/bdfr").exists() == 0 @pytest.mark.skipif(sys.platform == "win32", reason="Completions are not currently supported on Windows.") def test_cli_completion_fish(tmp_path: Path): - with patch("appdirs.user_data_dir", return_value=str(tmp_path)): + tmp_path = str(tmp_path) + with patch("appdirs.user_data_dir", return_value=tmp_path): Completion("fish").install() - assert Path.exists(Path(str(tmp_path) + "/fish/vendor_completions.d/bdfr.fish")) == 1 + assert Path(tmp_path + "/fish/vendor_completions.d/bdfr.fish").exists() == 1 Completion("fish").uninstall() - assert Path.exists(Path(str(tmp_path) + "/fish/vendor_completions.d/bdfr.fish")) == 0 + assert Path(tmp_path + "/fish/vendor_completions.d/bdfr.fish").exists() == 0 @pytest.mark.skipif(sys.platform == "win32", reason="Completions are not currently supported on Windows.") def test_cli_completion_zsh(tmp_path: Path): - with patch("appdirs.user_data_dir", return_value=str(tmp_path)): + tmp_path = str(tmp_path) + with patch("appdirs.user_data_dir", return_value=tmp_path): Completion("zsh").install() - assert Path.exists(Path(str(tmp_path) + "/zsh/site-functions/_bdfr")) == 1 + assert Path(tmp_path + "/zsh/site-functions/_bdfr").exists() == 1 Completion("zsh").uninstall() - assert Path.exists(Path(str(tmp_path) + "/zsh/site-functions/_bdfr")) == 0 + assert Path(tmp_path + "/zsh/site-functions/_bdfr").exists() == 0 From 7fef403757203af6116fa37ccdcdac273df2018b Mon Sep 17 00:00:00 2001 From: OMEGA_RAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Tue, 20 Dec 2022 13:32:43 -0500 Subject: [PATCH 60/76] Update completion.py --- bdfr/completion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdfr/completion.py b/bdfr/completion.py index 668fb2a..7b38322 100644 --- a/bdfr/completion.py +++ b/bdfr/completion.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# coding=utf-8 +# -*- coding: utf-8 -*- import subprocess from os import environ From fe9cc7f29fe81806e342141390352fe24cbd4da6 Mon Sep 17 00:00:00 2001 From: Soulsuck24 <79275800+Soulsuck24@users.noreply.github.com> Date: Sat, 24 Dec 2022 20:52:45 -0500 Subject: [PATCH 61/76] Redgifs updates Update Redgifs regex for further edge case. Add test for checking ID. --- bdfr/site_downloaders/redgifs.py | 9 +++++++-- tests/site_downloaders/test_redgifs.py | 13 +++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/bdfr/site_downloaders/redgifs.py b/bdfr/site_downloaders/redgifs.py index 40d1466..95d23d2 100644 --- a/bdfr/site_downloaders/redgifs.py +++ b/bdfr/site_downloaders/redgifs.py @@ -23,13 +23,18 @@ class Redgifs(BaseDownloader): return [Resource(self.post, m, Resource.retry_download(m), None) for m in media_urls] @staticmethod - def _get_link(url: str) -> set[str]: + def _get_id(url: str) -> str: try: if url.endswith("/"): url = url.removesuffix("/") - redgif_id = re.match(r".*/(.*?)(\..{0,})?$", url).group(1) + redgif_id = re.match(r".*/(.*?)(?:\?.*|\..{0,})?$", url).group(1) except AttributeError: raise SiteDownloaderError(f"Could not extract Redgifs ID from {url}") + return redgif_id + + @staticmethod + def _get_link(url: str) -> set[str]: + redgif_id = Redgifs._get_id(url) auth_token = json.loads(Redgifs.retrieve_url("https://api.redgifs.com/v2/auth/temporary").text)["token"] if not auth_token: diff --git a/tests/site_downloaders/test_redgifs.py b/tests/site_downloaders/test_redgifs.py index 37e48e8..fd0e0ed 100644 --- a/tests/site_downloaders/test_redgifs.py +++ b/tests/site_downloaders/test_redgifs.py @@ -10,6 +10,19 @@ from bdfr.resource import Resource from bdfr.site_downloaders.redgifs import Redgifs +@pytest.mark.parametrize( + ("test_url", "expected"), + ( + ("https://redgifs.com/watch/frighteningvictorioussalamander", "frighteningvictorioussalamander"), + ("https://www.redgifs.com/watch/genuineprivateguillemot/", "genuineprivateguillemot"), + ("https://www.redgifs.com/watch/marriedcrushingcob?rel=u%3Akokiri.girl%3Bo%3Arecent", "marriedcrushingcob"), + ), +) +def test_get_id(test_url: str, expected: str): + result = Redgifs._get_id(test_url) + assert result == expected + + @pytest.mark.online @pytest.mark.parametrize( ("test_url", "expected"), From 13887ca7e1acd605426537a6043bf901aee55515 Mon Sep 17 00:00:00 2001 From: Soulsuck24 <79275800+Soulsuck24@users.noreply.github.com> Date: Mon, 26 Dec 2022 20:25:20 -0500 Subject: [PATCH 62/76] Redgif updates Coverage for direct links. The direct link won't work because it will have the wrong auth anyway but this will at least end up with the right API call. --- bdfr/site_downloaders/redgifs.py | 4 +++- tests/site_downloaders/test_redgifs.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/bdfr/site_downloaders/redgifs.py b/bdfr/site_downloaders/redgifs.py index 95d23d2..205674a 100644 --- a/bdfr/site_downloaders/redgifs.py +++ b/bdfr/site_downloaders/redgifs.py @@ -27,7 +27,9 @@ class Redgifs(BaseDownloader): try: if url.endswith("/"): url = url.removesuffix("/") - redgif_id = re.match(r".*/(.*?)(?:\?.*|\..{0,})?$", url).group(1) + redgif_id = re.match(r".*/(.*?)(?:\?.*|\..{0,})?$", url).group(1).lower() + if redgif_id.endswith("-mobile"): + redgif_id = redgif_id.removesuffix("-mobile") except AttributeError: raise SiteDownloaderError(f"Could not extract Redgifs ID from {url}") return redgif_id diff --git a/tests/site_downloaders/test_redgifs.py b/tests/site_downloaders/test_redgifs.py index fd0e0ed..5f4f6fc 100644 --- a/tests/site_downloaders/test_redgifs.py +++ b/tests/site_downloaders/test_redgifs.py @@ -16,6 +16,8 @@ from bdfr.site_downloaders.redgifs import Redgifs ("https://redgifs.com/watch/frighteningvictorioussalamander", "frighteningvictorioussalamander"), ("https://www.redgifs.com/watch/genuineprivateguillemot/", "genuineprivateguillemot"), ("https://www.redgifs.com/watch/marriedcrushingcob?rel=u%3Akokiri.girl%3Bo%3Arecent", "marriedcrushingcob"), + ("https://thumbs4.redgifs.com/DismalIgnorantDrongo.mp4", "dismalignorantdrongo"), + ("https://thumbs4.redgifs.com/DismalIgnorantDrongo-mobile.mp4", "dismalignorantdrongo"), ), ) def test_get_id(test_url: str, expected: str): From 2bafb1b99b552e2817484adea21cab087077dda0 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Wed, 28 Dec 2022 10:00:43 -0500 Subject: [PATCH 63/76] Consolidate flake8 settings Consolidates sane flake8 settings to pyproject with the Flake8-pyproject plugin. Does not change logic of test workflow but allows base settings to live in pyproject for anyone using flake8 as an external linter (e.g. vscode) Also fixes some flake8 errors that were not being picked up by current testing, mostly unused imports. --- .github/workflows/test.yml | 8 ++++++-- README.md | 8 ++++++-- bdfr/connector.py | 4 ++-- bdfr/site_downloaders/imgur.py | 2 +- bdfr/site_downloaders/vreddit.py | 7 ++----- pyproject.toml | 6 ++++++ tests/test_download_filter.py | 2 +- 7 files changed, 24 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0d52ef6..ca32bb3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -3,8 +3,12 @@ name: Python Test on: push: branches: [ master, development ] + paths-ignore: + - "*.md" pull_request: branches: [ master, development ] + paths-ignore: + - "*.md" jobs: test: @@ -27,7 +31,7 @@ jobs: - name: Install dependencies run: | - python -m pip install --upgrade pip flake8 pytest pytest-cov + python -m pip install --upgrade pip Flake8-pyproject pytest pytest-cov pip install . - name: Make configuration for tests @@ -38,7 +42,7 @@ jobs: - name: Lint with flake8 run: | - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 . --select=E9,F63,F7,F82 - name: Test with pytest run: | diff --git a/README.md b/README.md index 120448e..dd700ad 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,12 @@ # Bulk Downloader for Reddit -[![PyPI version](https://img.shields.io/pypi/v/bdfr.svg)](https://pypi.python.org/pypi/bdfr) -[![PyPI downloads](https://img.shields.io/pypi/dm/bdfr)](https://pypi.python.org/pypi/bdfr) +[![PyPI Status](https://img.shields.io/pypi/status/bdfr?logo=PyPI)](https://pypi.python.org/pypi/bdfr) +[![PyPI version](https://img.shields.io/pypi/v/bdfr.svg?logo=PyPI)](https://pypi.python.org/pypi/bdfr) +[![PyPI downloads](https://img.shields.io/pypi/dm/bdfr?logo=PyPI)](https://pypi.python.org/pypi/bdfr) +[![AUR version](https://img.shields.io/aur/version/python-bdfr?logo=Arch%20Linux)](https://aur.archlinux.org/packages/python-bdfr) [![Python Test](https://github.com/aliparlakci/bulk-downloader-for-reddit/actions/workflows/test.yml/badge.svg?branch=master)](https://github.com/aliparlakci/bulk-downloader-for-reddit/actions/workflows/test.yml) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg?logo=Python)](https://github.com/psf/black) +[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit) This is a tool to download submissions or submission data from Reddit. It can be used to archive data or even crawl Reddit to gather research data. The BDFR is flexible and can be used in scripts if needed through an extensive command-line interface. [List of currently supported sources](#list-of-currently-supported-sources) diff --git a/bdfr/connector.py b/bdfr/connector.py index bf50f32..d0c4ac2 100644 --- a/bdfr/connector.py +++ b/bdfr/connector.py @@ -205,7 +205,7 @@ class RedditConnector(metaclass=ABCMeta): else: log_path = Path(self.args.log).resolve().expanduser() if not log_path.parent.exists(): - raise errors.BulkDownloaderException(f"Designated location for logfile does not exist") + raise errors.BulkDownloaderException("Designated location for logfile does not exist") backup_count = self.cfg_parser.getint("DEFAULT", "backup_log_count", fallback=3) file_handler = logging.handlers.RotatingFileHandler( log_path, @@ -323,7 +323,7 @@ class RedditConnector(metaclass=ABCMeta): def get_multireddits(self) -> list[Iterator]: if self.args.multireddit: if len(self.args.user) != 1: - logger.error(f"Only 1 user can be supplied when retrieving from multireddits") + logger.error("Only 1 user can be supplied when retrieving from multireddits") return [] out = [] for multi in self.split_args_input(self.args.multireddit): diff --git a/bdfr/site_downloaders/imgur.py b/bdfr/site_downloaders/imgur.py index a4c378f..537ab64 100644 --- a/bdfr/site_downloaders/imgur.py +++ b/bdfr/site_downloaders/imgur.py @@ -67,7 +67,7 @@ class Imgur(BaseDownloader): image_dict = re.search(outer_regex, chosen_script).group(1) image_dict = re.search(inner_regex, image_dict).group(1) except AttributeError: - raise SiteDownloaderError(f"Could not find image dictionary in page source") + raise SiteDownloaderError("Could not find image dictionary in page source") try: image_dict = json.loads(image_dict) diff --git a/bdfr/site_downloaders/vreddit.py b/bdfr/site_downloaders/vreddit.py index 8f6022e..48f5ba1 100644 --- a/bdfr/site_downloaders/vreddit.py +++ b/bdfr/site_downloaders/vreddit.py @@ -2,14 +2,11 @@ # -*- coding: utf-8 -*- import logging -import tempfile -from pathlib import Path -from typing import Callable, Optional +from typing import Optional -import yt_dlp from praw.models import Submission -from bdfr.exceptions import NotADownloadableLinkError, SiteDownloaderError +from bdfr.exceptions import NotADownloadableLinkError from bdfr.resource import Resource from bdfr.site_authenticator import SiteAuthenticator from bdfr.site_downloaders.youtube import Youtube diff --git a/pyproject.toml b/pyproject.toml index 6879a6d..a8b0dd2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,12 @@ bdfr-download = "bdfr.__main__:cli_download" [tool.black] line-length = 120 +[tool.flake8] +exclude = ["scripts"] +max-line-length = 120 +show-source = true +statistics = true + [tool.isort] profile = "black" py_version = 39 diff --git a/tests/test_download_filter.py b/tests/test_download_filter.py index 3b4d6b8..6062dc3 100644 --- a/tests/test_download_filter.py +++ b/tests/test_download_filter.py @@ -76,4 +76,4 @@ def test_filter_empty_filter(test_url: str): download_filter = DownloadFilter() test_resource = Resource(MagicMock(), test_url, lambda: None) result = download_filter.check_resource(test_resource) - assert result is True + assert result From 874c7e3117450f5ca477ee80b048653b5360df35 Mon Sep 17 00:00:00 2001 From: Soulsuck24 <79275800+Soulsuck24@users.noreply.github.com> Date: Sat, 31 Dec 2022 08:53:13 -0500 Subject: [PATCH 64/76] Redgif fixes Missing half of #733 --- bdfr/site_downloaders/download_factory.py | 2 +- tests/site_downloaders/test_download_factory.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/bdfr/site_downloaders/download_factory.py b/bdfr/site_downloaders/download_factory.py index df2a0ea..6237ecd 100644 --- a/bdfr/site_downloaders/download_factory.py +++ b/bdfr/site_downloaders/download_factory.py @@ -28,7 +28,7 @@ class DownloadFactory: sanitised_url = DownloadFactory.sanitise_url(url) if re.match(r"(i\.|m\.)?imgur", sanitised_url): return Imgur - elif re.match(r"(i\.)?(redgifs|gifdeliverynetwork)", sanitised_url): + elif re.match(r"(i\.|thumbs\d\.)?(redgifs|gifdeliverynetwork)", sanitised_url): return Redgifs elif re.match(r".*/.*\.[a-zA-Z34]{3,4}(\?[\w;&=]*)?$", sanitised_url) and not DownloadFactory.is_web_resource( sanitised_url diff --git a/tests/site_downloaders/test_download_factory.py b/tests/site_downloaders/test_download_factory.py index bb09471..062635c 100644 --- a/tests/site_downloaders/test_download_factory.py +++ b/tests/site_downloaders/test_download_factory.py @@ -40,6 +40,7 @@ from bdfr.site_downloaders.youtube import Youtube ("https://youtube.com/watch?v=Gv8Wz74FjVA", Youtube), ("https://redgifs.com/watch/courageousimpeccablecanvasback", Redgifs), ("https://www.gifdeliverynetwork.com/repulsivefinishedandalusianhorse", Redgifs), + ("https://thumbs4.redgifs.com/DismalIgnorantDrongo-mobile.mp4", Redgifs), ("https://youtu.be/DevfjHOhuFc", Youtube), ("https://m.youtube.com/watch?v=kr-FeojxzUM", Youtube), ("https://dynasty-scans.com/system/images_images/000/017/819/original/80215103_p0.png?1612232781", Direct), From c4bece2f5844e41e4771c7a95b857474ac0ab030 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Sat, 31 Dec 2022 09:09:48 -0500 Subject: [PATCH 65/76] Add flake8 to precommit adds flake8 to pre-commit and dev requirements. --- .pre-commit-config.yaml | 6 ++++++ docs/CONTRIBUTING.md | 3 ++- pyproject.toml | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7013228..add4ea6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,6 +13,12 @@ repos: - id: isort name: isort (python) + - repo: https://github.com/pycqa/flake8 + rev: 6.0.0 + hooks: + - id: flake8 + additional_dependencies: [Flake8-pyproject] + - repo: https://github.com/markdownlint/markdownlint rev: v0.12.0 hooks: diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 72666e7..53fcb03 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -69,12 +69,13 @@ python3 -m pip install -e .[dev] The BDFR project uses several tools to manage the code of the project. These include: - [black](https://github.com/psf/black) +- [flake8](https://github.com/john-hen/Flake8-pyproject) - [isort](https://github.com/PyCQA/isort) - [markdownlint (mdl)](https://github.com/markdownlint/markdownlint) - [tox](https://tox.wiki/en/latest/) - [pre-commit](https://github.com/pre-commit/pre-commit) -The first three tools are formatters. These change the code to the standards expected for the BDFR project. The configuration details for these tools are contained in the [pyproject.toml](../pyproject.toml) file for the project. +The first four tools are formatters. These change the code to the standards expected for the BDFR project. The configuration details for these tools are contained in the [pyproject.toml](../pyproject.toml) file for the project. The tool `tox` is used to run tests and tools on demand and has the following environments: diff --git a/pyproject.toml b/pyproject.toml index a8b0dd2..4a28a8f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ data-files = {"config" = ["bdfr/default_config.cfg",]} [project.optional-dependencies] dev = [ "black>=22.10.0", + "Flake8-pyproject>=1.2.2", "isort>=5.10.1", "pre-commit>=2.20.0", "pytest>=7.1.0", From b6edc367532a42cd91161450c8deb829e4d3780c Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Sun, 1 Jan 2023 03:46:08 -0500 Subject: [PATCH 66/76] Update connector for 7 digit ID's --- bdfr/connector.py | 2 +- tests/test_connector.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/bdfr/connector.py b/bdfr/connector.py index bf50f32..1583e37 100644 --- a/bdfr/connector.py +++ b/bdfr/connector.py @@ -301,7 +301,7 @@ class RedditConnector(metaclass=ABCMeta): def get_submissions_from_link(self) -> list[list[praw.models.Submission]]: supplied_submissions = [] for sub_id in self.args.link: - if len(sub_id) == 6: + if len(sub_id) in (6, 7): supplied_submissions.append(self.reddit_instance.submission(id=sub_id)) else: supplied_submissions.append(self.reddit_instance.submission(url=sub_id)) diff --git a/tests/test_connector.py b/tests/test_connector.py index 9681e4b..bf781e2 100644 --- a/tests/test_connector.py +++ b/tests/test_connector.py @@ -167,6 +167,7 @@ def test_create_authenticator(downloader_mock: MagicMock): ("lvpf4l",), ("lvpf4l", "lvqnsn"), ("lvpf4l", "lvqnsn", "lvl9kd"), + ("1000000",), ), ) def test_get_submissions_from_link( From 3fdaf353068b026468186ad8f57ac403c4767760 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Sun, 1 Jan 2023 08:59:34 -0500 Subject: [PATCH 67/76] Update black/isort --- .pre-commit-config.yaml | 4 ++-- pyproject.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index add4ea6..28bd140 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,12 +3,12 @@ repos: - repo: https://github.com/psf/black - rev: 22.10.0 + rev: 22.12.0 hooks: - id: black - repo: https://github.com/pycqa/isort - rev: 5.10.1 + rev: 5.11.4 hooks: - id: isort name: isort (python) diff --git a/pyproject.toml b/pyproject.toml index 4a28a8f..c88008d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,9 +41,9 @@ data-files = {"config" = ["bdfr/default_config.cfg",]} [project.optional-dependencies] dev = [ - "black>=22.10.0", + "black>=22.12.0", "Flake8-pyproject>=1.2.2", - "isort>=5.10.1", + "isort>=5.11.4", "pre-commit>=2.20.0", "pytest>=7.1.0", "tox>=3.27.1", From aced16456027f48427cd348fe8eb3bd8ba02d199 Mon Sep 17 00:00:00 2001 From: OMEGA_RAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Sun, 1 Jan 2023 23:11:43 -0500 Subject: [PATCH 68/76] Update test.yml Corrects .md ignore and adds markdown lint files as there would be no need to trigger python tests from them. --- .github/workflows/test.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ca32bb3..927d70a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,11 +4,15 @@ on: push: branches: [ master, development ] paths-ignore: - - "*.md" + - "**.md" + - ".markdown_style.rb" + - ".mdlrc" pull_request: branches: [ master, development ] paths-ignore: - - "*.md" + - "**.md" + - ".markdown_style.rb" + - ".mdlrc" jobs: test: From e0e780a272a27ba22149af54664b4e214517cbb0 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 2 Jan 2023 16:17:52 -0700 Subject: [PATCH 69/76] Update README.md to include information from #549 Serene-Arc determined that the time filter does not apply if the reddit API is not providing the "Top" or "Controversial" list. As such, I have updated the time option documentation to clarify that. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index dd700ad..a174d80 100644 --- a/README.md +++ b/README.md @@ -197,6 +197,7 @@ The following options are common between both the `archive` and `download` comma - `-t, --time` - This is the time filter that will be applied to all applicable sources - This option does not apply to upvoted or saved posts when scraping from these sources + - This option only applies if sorting by top or controversial. See --sort for more detail. - The following options are available: - `all` (default) - `hour` From f40ac35f4af9f26223cc12eb78a1f10c8ff83a1f Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Tue, 3 Jan 2023 19:16:23 +1000 Subject: [PATCH 70/76] Add option to determine restriction scheme --- bdfr/file_name_formatter.py | 15 ++++++++++++--- tests/test_file_name_formatter.py | 24 ++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/bdfr/file_name_formatter.py b/bdfr/file_name_formatter.py index 9ee481d..0330336 100644 --- a/bdfr/file_name_formatter.py +++ b/bdfr/file_name_formatter.py @@ -28,12 +28,19 @@ class FileNameFormatter: "upvotes", ) - def __init__(self, file_format_string: str, directory_format_string: str, time_format_string: str): + def __init__( + self, + file_format_string: str, + directory_format_string: str, + time_format_string: str, + restriction_scheme: Optional[str] = None, + ): if not self.validate_string(file_format_string): raise BulkDownloaderException(f'"{file_format_string}" is not a valid format string') self.file_format_string = file_format_string self.directory_format_string: list[str] = directory_format_string.split("/") self.time_format_string = time_format_string + self.restiction_scheme = restriction_scheme.lower().strip() if restriction_scheme else None def _format_name(self, submission: Union[Comment, Submission], format_string: str) -> str: if isinstance(submission, Submission): @@ -52,9 +59,11 @@ class FileNameFormatter: result = result.replace("/", "") - if platform.system() == "Windows": + if self.restiction_scheme is None: + if platform.system() == "Windows": + result = FileNameFormatter._format_for_windows(result) + elif self.restiction_scheme == "windows": result = FileNameFormatter._format_for_windows(result) - return result @staticmethod diff --git a/tests/test_file_name_formatter.py b/tests/test_file_name_formatter.py index 4964b3b..5f94e5f 100644 --- a/tests/test_file_name_formatter.py +++ b/tests/test_file_name_formatter.py @@ -33,6 +33,10 @@ def submission() -> MagicMock: return test +def check_valid_windows_path(test_string: str): + return test_string == FileNameFormatter._format_for_windows(test_string) + + def do_test_string_equality(result: Union[Path, str], expected: str) -> bool: if platform.system() == "Windows": expected = FileNameFormatter._format_for_windows(expected) @@ -91,6 +95,15 @@ def test_check_format_string_validity(test_string: str, expected: bool): @pytest.mark.online @pytest.mark.reddit +@pytest.mark.parametrize( + "restriction_scheme", + ( + "windows", + "linux", + "bla", + None, + ), +) @pytest.mark.parametrize( ("test_format_string", "expected"), ( @@ -102,10 +115,17 @@ def test_check_format_string_validity(test_string: str, expected: bool): ("{REDDITOR}_{TITLE}_{POSTID}", "Kirsty-Blue_George Russel acknowledges the Twitter trend about him_w22m5l"), ), ) -def test_format_name_real(test_format_string: str, expected: str, reddit_submission: praw.models.Submission): - test_formatter = FileNameFormatter(test_format_string, "", "") +def test_format_name_real( + test_format_string: str, + expected: str, + reddit_submission: praw.models.Submission, + restriction_scheme: Optional[str], +): + test_formatter = FileNameFormatter(test_format_string, "", "", restriction_scheme) result = test_formatter._format_name(reddit_submission, test_format_string) assert do_test_string_equality(result, expected) + if restriction_scheme == "windows": + assert check_valid_windows_path(result) @pytest.mark.online From 4f07e92c5ee47d38941815d2fe48d90de884e287 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Wed, 4 Jan 2023 19:04:31 +1000 Subject: [PATCH 71/76] Add option to classes --- bdfr/__main__.py | 3 ++- bdfr/configuration.py | 1 + bdfr/connector.py | 4 ++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/bdfr/__main__.py b/bdfr/__main__.py index e35ba0a..2823ce1 100644 --- a/bdfr/__main__.py +++ b/bdfr/__main__.py @@ -20,15 +20,16 @@ _common_options = [ click.argument("directory", type=str), click.option("--authenticate", is_flag=True, default=None), click.option("--config", type=str, default=None), - click.option("--opts", type=str, default=None), click.option("--disable-module", multiple=True, default=None, type=str), click.option("--exclude-id", default=None, multiple=True), click.option("--exclude-id-file", default=None, multiple=True), click.option("--file-scheme", default=None, type=str), + click.option("--filename-restriction-scheme", type=click.Choice(("linux", "windows")), default=None), click.option("--folder-scheme", default=None, type=str), click.option("--ignore-user", type=str, multiple=True, default=None), click.option("--include-id-file", multiple=True, default=None), click.option("--log", type=str, default=None), + click.option("--opts", type=str, default=None), click.option("--saved", is_flag=True, default=None), click.option("--search", default=None, type=str), click.option("--submitted", is_flag=True, default=None), diff --git a/bdfr/configuration.py b/bdfr/configuration.py index 0d00192..78ae12e 100644 --- a/bdfr/configuration.py +++ b/bdfr/configuration.py @@ -23,6 +23,7 @@ class Configuration(Namespace): self.exclude_id = [] self.exclude_id_file = [] self.file_scheme: str = "{REDDITOR}_{TITLE}_{POSTID}" + self.filename_restriction_scheme = None self.folder_scheme: str = "{SUBREDDIT}" self.ignore_user = [] self.include_id_file = [] diff --git a/bdfr/connector.py b/bdfr/connector.py index 6d7bc64..8fe149a 100644 --- a/bdfr/connector.py +++ b/bdfr/connector.py @@ -107,6 +107,10 @@ class RedditConnector(metaclass=ABCMeta): self.args.time_format = option if not self.args.disable_module: self.args.disable_module = [self.cfg_parser.get("DEFAULT", "disabled_modules", fallback="")] + if not self.args.filename_restriction_scheme: + self.args.filename_restriction_scheme = self.cfg_parser.get( + "DEFAULT", "filename_restriction_scheme", fallback=None + ) # Update config on disk with open(self.config_location, "w") as file: self.cfg_parser.write(file) From 8c57dc228370d101722ec9d9e23786305c7e8638 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Wed, 4 Jan 2023 19:07:31 +1000 Subject: [PATCH 72/76] Add missing pytest flags to test --- tests/integration_tests/test_download_integration.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration_tests/test_download_integration.py b/tests/integration_tests/test_download_integration.py index 138ea61..545bd31 100644 --- a/tests/integration_tests/test_download_integration.py +++ b/tests/integration_tests/test_download_integration.py @@ -400,6 +400,8 @@ def test_cli_download_score_filter(test_args: list[str], was_filtered: bool, tmp assert ("filtered due to score" in result.output) == was_filtered +@pytest.mark.online +@pytest.mark.reddit @pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") @pytest.mark.parametrize( ("test_args", "response"), From b64f5080257711465e2c752cda8c56d3bacc8e51 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Wed, 4 Jan 2023 19:37:02 +1000 Subject: [PATCH 73/76] Conform path length to filename scheme restriction --- bdfr/file_name_formatter.py | 16 +++++++++++----- tests/test_file_name_formatter.py | 24 +++++++++++++++++------- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/bdfr/file_name_formatter.py b/bdfr/file_name_formatter.py index 0330336..dd04fad 100644 --- a/bdfr/file_name_formatter.py +++ b/bdfr/file_name_formatter.py @@ -27,6 +27,8 @@ class FileNameFormatter: "title", "upvotes", ) + WINDOWS_MAX_PATH_LENGTH = 260 + LINUX_MAX_PATH_LENGTH = 4096 def __init__( self, @@ -41,6 +43,10 @@ class FileNameFormatter: self.directory_format_string: list[str] = directory_format_string.split("/") self.time_format_string = time_format_string self.restiction_scheme = restriction_scheme.lower().strip() if restriction_scheme else None + if self.restiction_scheme == "windows": + self.max_path = self.WINDOWS_MAX_PATH_LENGTH + else: + self.max_path = self.find_max_path_length() def _format_name(self, submission: Union[Comment, Submission], format_string: str) -> str: if isinstance(submission, Submission): @@ -63,6 +69,7 @@ class FileNameFormatter: if platform.system() == "Windows": result = FileNameFormatter._format_for_windows(result) elif self.restiction_scheme == "windows": + logger.debug("Forcing Windows-compatible filenames") result = FileNameFormatter._format_for_windows(result) return result @@ -135,14 +142,13 @@ class FileNameFormatter: raise BulkDownloaderException(f"Could not determine path name: {subfolder}, {index}, {resource.extension}") return file_path - @staticmethod - def limit_file_name_length(filename: str, ending: str, root: Path) -> Path: + def limit_file_name_length(self, filename: str, ending: str, root: Path) -> Path: root = root.resolve().expanduser() possible_id = re.search(r"((?:_\w{6})?$)", filename) if possible_id: ending = possible_id.group(1) + ending filename = filename[: possible_id.start()] - max_path = FileNameFormatter.find_max_path_length() + max_path = self.max_path max_file_part_length_chars = 255 - len(ending) max_file_part_length_bytes = 255 - len(ending.encode("utf-8")) max_path_length = max_path - len(ending) - len(str(root)) - 1 @@ -166,9 +172,9 @@ class FileNameFormatter: return int(subprocess.check_output(["getconf", "PATH_MAX", "/"])) except (ValueError, subprocess.CalledProcessError, OSError): if platform.system() == "Windows": - return 260 + return FileNameFormatter.WINDOWS_MAX_PATH_LENGTH else: - return 4096 + return FileNameFormatter.LINUX_MAX_PATH_LENGTH def format_resource_paths( self, diff --git a/tests/test_file_name_formatter.py b/tests/test_file_name_formatter.py index 5f94e5f..fb34a53 100644 --- a/tests/test_file_name_formatter.py +++ b/tests/test_file_name_formatter.py @@ -33,6 +33,12 @@ def submission() -> MagicMock: return test +@pytest.fixture() +def test_formatter() -> FileNameFormatter: + out = FileNameFormatter("{TITLE}", "", "ISO") + return out + + def check_valid_windows_path(test_string: str): return test_string == FileNameFormatter._format_for_windows(test_string) @@ -231,8 +237,8 @@ def test_format_multiple_resources(): ("😍💕✨" * 100, "_1.png"), ), ) -def test_limit_filename_length(test_filename: str, test_ending: str): - result = FileNameFormatter.limit_file_name_length(test_filename, test_ending, Path(".")) +def test_limit_filename_length(test_filename: str, test_ending: str, test_formatter: FileNameFormatter): + result = test_formatter.limit_file_name_length(test_filename, test_ending, Path(".")) assert len(result.name) <= 255 assert len(result.name.encode("utf-8")) <= 255 assert len(str(result)) <= FileNameFormatter.find_max_path_length() @@ -253,8 +259,10 @@ def test_limit_filename_length(test_filename: str, test_ending: str): ("😍💕✨" * 100 + "_aaa1aa", "_1.png", "_aaa1aa_1.png"), ), ) -def test_preserve_id_append_when_shortening(test_filename: str, test_ending: str, expected_end: str): - result = FileNameFormatter.limit_file_name_length(test_filename, test_ending, Path(".")) +def test_preserve_id_append_when_shortening( + test_filename: str, test_ending: str, expected_end: str, test_formatter: FileNameFormatter +): + result = test_formatter.limit_file_name_length(test_filename, test_ending, Path(".")) assert len(result.name) <= 255 assert len(result.name.encode("utf-8")) <= 255 assert result.name.endswith(expected_end) @@ -284,8 +292,8 @@ def test_shorten_filename_real(submission: MagicMock, tmp_path: Path): ("a" * 500, "_bbbbbb.jpg"), ), ) -def test_shorten_path(test_name: str, test_ending: str, tmp_path: Path): - result = FileNameFormatter.limit_file_name_length(test_name, test_ending, tmp_path) +def test_shorten_path(test_name: str, test_ending: str, tmp_path: Path, test_formatter: FileNameFormatter): + result = test_formatter.limit_file_name_length(test_name, test_ending, tmp_path) assert len(str(result.name)) <= 255 assert len(str(result.name).encode("UTF-8")) <= 255 assert len(str(result.name).encode("cp1252")) <= 255 @@ -482,7 +490,9 @@ def test_get_max_path_length(): def test_windows_max_path(tmp_path: Path): with unittest.mock.patch("platform.system", return_value="Windows"): with unittest.mock.patch("bdfr.file_name_formatter.FileNameFormatter.find_max_path_length", return_value=260): - result = FileNameFormatter.limit_file_name_length("test" * 100, "_1.png", tmp_path) + mock = MagicMock() + mock.max_path = 260 + result = FileNameFormatter.limit_file_name_length(mock, "test" * 100, "_1.png", tmp_path) assert len(str(result)) <= 260 assert len(result.name) <= (260 - len(str(tmp_path))) From 77a01e1627e8c47a6cf27f76894c08f661894d14 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Wed, 4 Jan 2023 19:49:09 +1000 Subject: [PATCH 74/76] Add tests for new option --- bdfr/connector.py | 5 ++++- .../test_download_integration.py | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/bdfr/connector.py b/bdfr/connector.py index 8fe149a..860750d 100644 --- a/bdfr/connector.py +++ b/bdfr/connector.py @@ -111,6 +111,7 @@ class RedditConnector(metaclass=ABCMeta): self.args.filename_restriction_scheme = self.cfg_parser.get( "DEFAULT", "filename_restriction_scheme", fallback=None ) + logger.debug(f"Setting filename restriction scheme to '{self.args.filename_restriction_scheme}'") # Update config on disk with open(self.config_location, "w") as file: self.cfg_parser.write(file) @@ -399,7 +400,9 @@ class RedditConnector(metaclass=ABCMeta): raise errors.BulkDownloaderException(f"User {name} is banned") def create_file_name_formatter(self) -> FileNameFormatter: - return FileNameFormatter(self.args.file_scheme, self.args.folder_scheme, self.args.time_format) + return FileNameFormatter( + self.args.file_scheme, self.args.folder_scheme, self.args.time_format, self.args.filename_restriction_scheme + ) def create_time_filter(self) -> RedditTypes.TimeType: try: diff --git a/tests/integration_tests/test_download_integration.py b/tests/integration_tests/test_download_integration.py index 545bd31..35ca0c0 100644 --- a/tests/integration_tests/test_download_integration.py +++ b/tests/integration_tests/test_download_integration.py @@ -421,3 +421,22 @@ def test_user_serv_fail(test_args: list[str], response: int, tmp_path: Path): result = runner.invoke(cli, test_args) assert result.exit_code == 0 assert f"received {response} HTTP response" in result.output + + +@pytest.mark.online +@pytest.mark.reddit +@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests") +@pytest.mark.parametrize( + "test_args", + ( + ["-l", "102vd5i", "--filename-restriction-scheme", "windows"], + ["-l", "m3hxzd", "--filename-restriction-scheme", "windows"], + ), +) +def test_cli_download_explicit_filename_restriction_scheme(test_args: list[str], tmp_path: Path): + runner = CliRunner() + test_args = create_basic_args_for_download_runner(test_args, tmp_path) + result = runner.invoke(cli, test_args) + assert result.exit_code == 0 + assert "Downloaded submission" in result.output + assert "Forcing Windows-compatible filenames" in result.output From 12311029e4d8f4354c14fbc2fd368e8fe3c34505 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Wed, 4 Jan 2023 19:49:50 +1000 Subject: [PATCH 75/76] Conform test name --- tests/integration_tests/test_download_integration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration_tests/test_download_integration.py b/tests/integration_tests/test_download_integration.py index 35ca0c0..4dae353 100644 --- a/tests/integration_tests/test_download_integration.py +++ b/tests/integration_tests/test_download_integration.py @@ -410,7 +410,7 @@ def test_cli_download_score_filter(test_args: list[str], was_filtered: bool, tmp (["--user", "nasa", "--submitted"], 504), ), ) -def test_user_serv_fail(test_args: list[str], response: int, tmp_path: Path): +def test_cli_download_user_reddit_server_error(test_args: list[str], response: int, tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) with patch("bdfr.connector.sleep", return_value=None): From 241021fa391a2e287d1d04ab86e7b4aa5b548741 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Thu, 5 Jan 2023 17:38:37 +1000 Subject: [PATCH 76/76] Add new option details --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index a174d80..b0bce0a 100644 --- a/README.md +++ b/README.md @@ -143,6 +143,10 @@ The following options are common between both the `archive` and `download` comma - Can be specified multiple times - Disables certain modules from being used - See [Disabling Modules](#disabling-modules) for more information and a list of module names +- `--filename-restriction-scheme` + - Can be: `windows`, `linux` + - Turns off the OS detection and specifies which system to use when making filenames + - See [Filesystem Restrictions](#filesystem-restrictions) - `--ignore-user` - This will add a user to ignore - Can be specified multiple times @@ -372,6 +376,7 @@ The following keys are optional, and defaults will be used if they cannot be fou - `max_wait_time` - `time_format` - `disabled_modules` +- `filename-restriction-scheme` All of these should not be modified unless you know what you're doing, as the default values will enable the BDFR to function just fine. A configuration is included in the BDFR when it is installed, and this will be placed in the configuration directory as the default. @@ -421,6 +426,14 @@ Running scenarios concurrently (at the same time) however, is more complicated. The way to fix this is to use the `--log` option to manually specify where the logfile is to be stored. If the given location is unique to each instance of the BDFR, then it will run fine. +## Filesystem Restrictions + +Different filesystems have different restrictions for what files and directories can be named. Thesse are separated into two broad categories: Linux-based filesystems, which have very few restrictions; and Windows-based filesystems, which are much more restrictive in terms if forbidden characters and length of paths. + +During the normal course of operation, the BDFR detects what filesystem it is running on and formats any filenames and directories to conform to the rules that are expected of it. However, there are cases where this will fail. When running on a Linux-based machine, or another system where the home filesystem is permissive, and accessing a share or drive with a less permissive system, the BDFR will assume that the *home* filesystem's rules apply. For example, when downloading to a SAMBA share from Ubuntu, there will be errors as SAMBA is more restrictive than Ubuntu. + +The best option would be to always download to a filesystem that is as permission as possible, such as an NFS share or ext4 drive. However, when this is not possible, the BDFR allows for the restriction scheme to be manually specified at either the command-line or in the configuration file. At the command-line, this is done with `--filename-restriction-scheme windows`, or else an option by the same name in the configuration file. + ## Manipulating Logfiles The logfiles that the BDFR outputs are consistent and quite detailed and in a format that is amenable to regex. To this end, a number of bash scripts have been [included here](./scripts). They show examples for how to extract successfully downloaded IDs, failed IDs, and more besides.