From e493ab048aeb02e45d83520e137e6dece6c78b11 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Fri, 5 Nov 2021 12:47:46 +1000 Subject: [PATCH 01/21] Fix bug with period not separating file extension --- bdfr/file_name_formatter.py | 5 ++++- tests/test_file_name_formatter.py | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/bdfr/file_name_formatter.py b/bdfr/file_name_formatter.py index 2fbf95f..e81fe7f 100644 --- a/bdfr/file_name_formatter.py +++ b/bdfr/file_name_formatter.py @@ -110,8 +110,11 @@ class FileNameFormatter: index = f'_{str(index)}' if index else '' if not resource.extension: raise BulkDownloaderException(f'Resource from {resource.url} has no extension') - ending = index + resource.extension file_name = str(self._format_name(resource.source_submission, self.file_format_string)) + if not re.match(r'.*\.$',file_name) and not re.match(r'^\..*',resource.extension): + ending = index + '.' + resource.extension + else: + ending = index + resource.extension try: file_path = self._limit_file_name_length(file_name, ending, subfolder) diff --git a/tests/test_file_name_formatter.py b/tests/test_file_name_formatter.py index f596d89..f9bb2ad 100644 --- a/tests/test_file_name_formatter.py +++ b/tests/test_file_name_formatter.py @@ -13,6 +13,9 @@ import pytest from bdfr.file_name_formatter import FileNameFormatter from bdfr.resource import Resource +from bdfr.site_downloaders.base_downloader import BaseDownloader +from bdfr.site_downloaders.fallback_downloaders.youtubedl_fallback import YoutubeDlFallback +from bdfr.site_downloaders.youtube import Youtube @pytest.fixture() @@ -380,3 +383,23 @@ def test_windows_max_path(tmp_path: Path): result = FileNameFormatter._limit_file_name_length('test' * 100, '_1.png', tmp_path) assert len(str(result)) <= 260 assert len(result.name) <= (260 - len(str(tmp_path))) + + +@pytest.mark.online +@pytest.mark.reddit +@pytest.mark.parametrize(('test_reddit_id', 'test_downloader', 'expected_names'), ( + ('gphmnr', YoutubeDlFallback, {'He has a lot to say today.mp4'}), + ('d0oir2', YoutubeDlFallback, {"Crunk's finest moment. Welcome to the new subreddit!.mp4"}), +)) +def test_name_submission( + test_reddit_id: str, + test_downloader: type(BaseDownloader), + expected_names: set[str], + reddit_instance: praw.reddit.Reddit, +): + test_submission = reddit_instance.submission(id=test_reddit_id) + test_resources = test_downloader(test_submission).find_resources() + test_formatter = FileNameFormatter('{TITLE}', '', '') + results = test_formatter.format_resource_paths(test_resources, Path('.')) + results = set([r[0].name for r in results]) + assert expected_names == results From 801784c46d7764832f98e988faa804e199003a23 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Fri, 5 Nov 2021 13:23:55 +1000 Subject: [PATCH 02/21] Fix a crash when downloading a disabled pornhub video --- bdfr/site_downloaders/pornhub.py | 8 +++++++- tests/site_downloaders/test_pornhub.py | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/bdfr/site_downloaders/pornhub.py b/bdfr/site_downloaders/pornhub.py index c2bc0ad..748454e 100644 --- a/bdfr/site_downloaders/pornhub.py +++ b/bdfr/site_downloaders/pornhub.py @@ -6,6 +6,7 @@ from typing import Optional from praw.models import Submission +from bdfr.exceptions import SiteDownloaderError from bdfr.resource import Resource from bdfr.site_authenticator import SiteAuthenticator from bdfr.site_downloaders.youtube import Youtube @@ -22,10 +23,15 @@ class PornHub(Youtube): 'format': 'best', 'nooverwrites': True, } + if video_attributes := super().get_video_attributes(self.post.url): + extension = video_attributes['ext'] + else: + raise SiteDownloaderError() + out = Resource( self.post, self.post.url, super()._download_video(ytdl_options), - super().get_video_attributes(self.post.url)['ext'], + extension, ) return [out] diff --git a/tests/site_downloaders/test_pornhub.py b/tests/site_downloaders/test_pornhub.py index 5c220cc..cbe3662 100644 --- a/tests/site_downloaders/test_pornhub.py +++ b/tests/site_downloaders/test_pornhub.py @@ -5,6 +5,7 @@ from unittest.mock import MagicMock import pytest +from bdfr.exceptions import SiteDownloaderError from bdfr.resource import Resource from bdfr.site_downloaders.pornhub import PornHub @@ -13,6 +14,7 @@ from bdfr.site_downloaders.pornhub import PornHub @pytest.mark.slow @pytest.mark.parametrize(('test_url', 'expected_hash'), ( ('https://www.pornhub.com/view_video.php?viewkey=ph6074c59798497', 'd9b99e4ebecf2d8d67efe5e70d2acf8a'), + ('https://www.pornhub.com/view_video.php?viewkey=ph5ede121f0d3f8', ''), )) def test_find_resources_good(test_url: str, expected_hash: str): test_submission = MagicMock() @@ -23,3 +25,15 @@ def test_find_resources_good(test_url: str, expected_hash: str): assert isinstance(resources[0], Resource) resources[0].download() assert resources[0].hash.hexdigest() == expected_hash + + +@pytest.mark.online +@pytest.mark.parametrize('test_url', ( + 'https://www.pornhub.com/view_video.php?viewkey=ph5ede121f0d3f8', +)) +def test_find_resources_good(test_url: str): + test_submission = MagicMock() + test_submission.url = test_url + downloader = PornHub(test_submission) + with pytest.raises(SiteDownloaderError): + downloader.find_resources() From 4be0f5ec190df4d3bc9d9672a45e01ff49600a41 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Mon, 15 Nov 2021 11:57:54 +1000 Subject: [PATCH 03/21] Add more tests for file length checking --- bdfr/file_name_formatter.py | 12 ++++++------ tests/test_file_name_formatter.py | 25 ++++++++++++++++++++----- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/bdfr/file_name_formatter.py b/bdfr/file_name_formatter.py index e81fe7f..542a722 100644 --- a/bdfr/file_name_formatter.py +++ b/bdfr/file_name_formatter.py @@ -111,19 +111,19 @@ class FileNameFormatter: if not resource.extension: raise BulkDownloaderException(f'Resource from {resource.url} has no extension') file_name = str(self._format_name(resource.source_submission, self.file_format_string)) - if not re.match(r'.*\.$',file_name) and not re.match(r'^\..*',resource.extension): + if not re.match(r'.*\.$', file_name) and not re.match(r'^\..*', resource.extension): ending = index + '.' + resource.extension else: ending = index + resource.extension try: - file_path = self._limit_file_name_length(file_name, ending, subfolder) + file_path = self.limit_file_name_length(file_name, ending, subfolder) except TypeError: raise BulkDownloaderException(f'Could not determine path name: {subfolder}, {index}, {resource.extension}') return file_path @staticmethod - def _limit_file_name_length(filename: str, ending: str, root: Path) -> Path: + def limit_file_name_length(filename: str, ending: str, root: Path) -> Path: root = root.resolve().expanduser() possible_id = re.search(r'((?:_\w{6})?$)', filename) if possible_id: @@ -133,9 +133,9 @@ class FileNameFormatter: max_length_chars = 255 - len(ending) max_length_bytes = 255 - len(ending.encode('utf-8')) max_path_length = max_path - len(ending) - len(str(root)) - 1 - while len(filename) > max_length_chars or \ - len(filename.encode('utf-8')) > max_length_bytes or \ - len(filename) > max_path_length: + while any([len(filename) > max_length_chars, + len(filename.encode('utf-8')) > max_length_bytes, + len(filename) > max_path_length]): filename = filename[:-1] return Path(root, filename + ending) diff --git a/tests/test_file_name_formatter.py b/tests/test_file_name_formatter.py index f9bb2ad..97fd851 100644 --- a/tests/test_file_name_formatter.py +++ b/tests/test_file_name_formatter.py @@ -188,7 +188,7 @@ def test_format_multiple_resources(): ('πŸ˜πŸ’•βœ¨' * 100, '_1.png'), )) def test_limit_filename_length(test_filename: str, test_ending: str): - result = FileNameFormatter._limit_file_name_length(test_filename, test_ending, Path('.')) + result = FileNameFormatter.limit_file_name_length(test_filename, test_ending, Path('.')) assert len(result.name) <= 255 assert len(result.name.encode('utf-8')) <= 255 assert len(str(result)) <= FileNameFormatter.find_max_path_length() @@ -207,15 +207,15 @@ def test_limit_filename_length(test_filename: str, test_ending: str): ('πŸ˜πŸ’•βœ¨' * 100 + '_aaa1aa', '_1.png', '_aaa1aa_1.png'), )) def test_preserve_id_append_when_shortening(test_filename: str, test_ending: str, expected_end: str): - result = FileNameFormatter._limit_file_name_length(test_filename, test_ending, Path('.')) + result = FileNameFormatter.limit_file_name_length(test_filename, test_ending, Path('.')) assert len(result.name) <= 255 assert len(result.name.encode('utf-8')) <= 255 assert result.name.endswith(expected_end) assert len(str(result)) <= FileNameFormatter.find_max_path_length() -def test_shorten_filenames(submission: MagicMock, tmp_path: Path): - submission.title = 'A' * 300 +def test_shorten_filename_real(submission: MagicMock, tmp_path: Path): + submission.title = 'A' * 500 submission.author.name = 'test' submission.subreddit.display_name = 'test' submission.id = 'BBBBBB' @@ -226,6 +226,21 @@ def test_shorten_filenames(submission: MagicMock, tmp_path: Path): result.touch() +@pytest.mark.parametrize(('test_name', 'test_ending'), ( + ('a', 'b'), + ('a', '_bbbbbb.jpg'), + ('a' * 20, '_bbbbbb.jpg'), + ('a' * 50, '_bbbbbb.jpg'), + ('a' * 500, '_bbbbbb.jpg'), +)) +def test_shorten_path(test_name: str, test_ending: str, tmp_path: Path): + result = FileNameFormatter.limit_file_name_length(test_name, test_ending, tmp_path) + assert len(str(result.name)) <= 255 + assert len(str(result.name).encode('UTF-8')) <= 255 + assert len(str(result.name).encode('cp1252')) <= 255 + assert len(str(result)) <= FileNameFormatter.find_max_path_length() + + @pytest.mark.parametrize(('test_string', 'expected'), ( ('test', 'test'), ('test😍', 'test'), @@ -380,7 +395,7 @@ def test_get_max_path_length(): def test_windows_max_path(tmp_path: Path): with unittest.mock.patch('platform.system', return_value='Windows'): with unittest.mock.patch('bdfr.file_name_formatter.FileNameFormatter.find_max_path_length', return_value=260): - result = FileNameFormatter._limit_file_name_length('test' * 100, '_1.png', tmp_path) + result = FileNameFormatter.limit_file_name_length('test' * 100, '_1.png', tmp_path) assert len(str(result)) <= 260 assert len(result.name) <= (260 - len(str(tmp_path))) From f05e909008a23ee401aed28e8fff7ecaf939c031 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Tue, 9 Nov 2021 19:30:27 -0500 Subject: [PATCH 04/21] Stop videos from being downloaded as images Erroneous .gifv extensions such as .giff or .gift resolve to a static image and are downloaded by the direct downloader. (ex: https://i.imgur.com/OGeVuAe.giff ) --- bdfr/site_downloaders/download_factory.py | 2 +- bdfr/site_downloaders/imgur.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bdfr/site_downloaders/download_factory.py b/bdfr/site_downloaders/download_factory.py index a4e9a6a..ccff8e7 100644 --- a/bdfr/site_downloaders/download_factory.py +++ b/bdfr/site_downloaders/download_factory.py @@ -24,7 +24,7 @@ class DownloadFactory: @staticmethod def pull_lever(url: str) -> Type[BaseDownloader]: sanitised_url = DownloadFactory.sanitise_url(url) - if re.match(r'(i\.)?imgur.*\.gifv$', sanitised_url): + if re.match(r'(i\.)?imgur.*\.gif.*$', sanitised_url): return Imgur elif re.match(r'.*/.*\.\w{3,4}(\?[\w;&=]*)?$', sanitised_url) and \ not DownloadFactory.is_web_resource(sanitised_url): diff --git a/bdfr/site_downloaders/imgur.py b/bdfr/site_downloaders/imgur.py index a3e3135..905581e 100644 --- a/bdfr/site_downloaders/imgur.py +++ b/bdfr/site_downloaders/imgur.py @@ -42,9 +42,9 @@ class Imgur(BaseDownloader): @staticmethod def _get_data(link: str) -> dict: link = link.rstrip('?') - if re.match(r'(?i).*\.gifv$', link): + if re.match(r'(?i).*\.gif.*$', link): link = link.replace('i.imgur', 'imgur') - link = re.sub('(?i)\\.gifv$', '', link) + link = re.sub('(?i)\\.gif.*$', '', link) res = Imgur.retrieve_url(link, cookies={'over18': '1', 'postpagebeta': '0'}) From bd802df38c13c3ebec0ff6f3a72ef3da26403c29 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Wed, 10 Nov 2021 15:39:12 -0500 Subject: [PATCH 05/21] Update test_imgur.py Adding test for .giff/.gift imgur extension --- tests/site_downloaders/test_imgur.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/site_downloaders/test_imgur.py b/tests/site_downloaders/test_imgur.py index 4c754ec..85b84c6 100644 --- a/tests/site_downloaders/test_imgur.py +++ b/tests/site_downloaders/test_imgur.py @@ -150,6 +150,14 @@ def test_imgur_extension_validation_bad(test_extension: str): 'https://i.imgur.com/uTvtQsw.gifv', ('46c86533aa60fc0e09f2a758513e3ac2',), ), + ( + 'https://i.imgur.com/OGeVuAe.giff', + ('77389679084d381336f168538793f218',) + ) + ( + 'https://i.imgur.com/OGeVuAe.gift', + ('77389679084d381336f168538793f218',) + ) )) def test_find_resources(test_url: str, expected_hashes: list[str]): mock_download = Mock() From 8c3af7029eb37c19d4e32dd87edcc87f8565f791 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Wed, 10 Nov 2021 20:33:58 -0500 Subject: [PATCH 06/21] Update test_imgur.py --- tests/site_downloaders/test_imgur.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/site_downloaders/test_imgur.py b/tests/site_downloaders/test_imgur.py index 85b84c6..359bdc3 100644 --- a/tests/site_downloaders/test_imgur.py +++ b/tests/site_downloaders/test_imgur.py @@ -153,11 +153,11 @@ def test_imgur_extension_validation_bad(test_extension: str): ( 'https://i.imgur.com/OGeVuAe.giff', ('77389679084d381336f168538793f218',) - ) + ), ( 'https://i.imgur.com/OGeVuAe.gift', ('77389679084d381336f168538793f218',) - ) + ), )) def test_find_resources(test_url: str, expected_hashes: list[str]): mock_download = Mock() From 53562f48737059d64440d3036d141aa33c55c0e0 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Mon, 15 Nov 2021 11:03:12 +1000 Subject: [PATCH 07/21] Fix regex --- bdfr/site_downloaders/download_factory.py | 2 +- bdfr/site_downloaders/imgur.py | 4 ++-- tests/site_downloaders/test_download_factory.py | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/bdfr/site_downloaders/download_factory.py b/bdfr/site_downloaders/download_factory.py index ccff8e7..2f4cf9d 100644 --- a/bdfr/site_downloaders/download_factory.py +++ b/bdfr/site_downloaders/download_factory.py @@ -24,7 +24,7 @@ class DownloadFactory: @staticmethod def pull_lever(url: str) -> Type[BaseDownloader]: sanitised_url = DownloadFactory.sanitise_url(url) - if re.match(r'(i\.)?imgur.*\.gif.*$', sanitised_url): + if re.match(r'(i\.)?imgur.*\.gif.+$', sanitised_url): return Imgur elif re.match(r'.*/.*\.\w{3,4}(\?[\w;&=]*)?$', sanitised_url) and \ not DownloadFactory.is_web_resource(sanitised_url): diff --git a/bdfr/site_downloaders/imgur.py b/bdfr/site_downloaders/imgur.py index 905581e..1f669d0 100644 --- a/bdfr/site_downloaders/imgur.py +++ b/bdfr/site_downloaders/imgur.py @@ -42,9 +42,9 @@ class Imgur(BaseDownloader): @staticmethod def _get_data(link: str) -> dict: link = link.rstrip('?') - if re.match(r'(?i).*\.gif.*$', link): + if re.match(r'(?i).*\.gif.+$', link): link = link.replace('i.imgur', 'imgur') - link = re.sub('(?i)\\.gif.*$', '', link) + link = re.sub('(?i)\\.gif.+$', '', link) res = Imgur.retrieve_url(link, cookies={'over18': '1', 'postpagebeta': '0'}) diff --git a/tests/site_downloaders/test_download_factory.py b/tests/site_downloaders/test_download_factory.py index 95b522d..15466cb 100644 --- a/tests/site_downloaders/test_download_factory.py +++ b/tests/site_downloaders/test_download_factory.py @@ -30,6 +30,7 @@ from bdfr.site_downloaders.youtube import Youtube ('https://imgur.com/BuzvZwb.gifv', Imgur), ('https://i.imgur.com/6fNdLst.gif', Direct), ('https://imgur.com/a/MkxAzeg', Imgur), + ('https://i.imgur.com/OGeVuAe.giff', Imgur), ('https://www.reddit.com/gallery/lu93m7', Gallery), ('https://gfycat.com/concretecheerfulfinwhale', Gfycat), ('https://www.erome.com/a/NWGw0F09', Erome), From 17939fe47ce0c7371f88316e574f5439b94b90d1 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 21 Nov 2021 11:48:29 +1000 Subject: [PATCH 08/21] Fix bug with youtube class and children --- .../fallback_downloaders/youtubedl_fallback.py | 8 +++++--- bdfr/site_downloaders/youtube.py | 11 ++++++----- .../fallback_downloaders/test_youtubedl_fallback.py | 11 +++++++++++ 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/bdfr/site_downloaders/fallback_downloaders/youtubedl_fallback.py b/bdfr/site_downloaders/fallback_downloaders/youtubedl_fallback.py index 6ede405..d8753bd 100644 --- a/bdfr/site_downloaders/fallback_downloaders/youtubedl_fallback.py +++ b/bdfr/site_downloaders/fallback_downloaders/youtubedl_fallback.py @@ -6,6 +6,7 @@ from typing import Optional from praw.models import Submission +from bdfr.exceptions import NotADownloadableLinkError from bdfr.resource import Resource from bdfr.site_authenticator import SiteAuthenticator from bdfr.site_downloaders.fallback_downloaders.fallback_downloader import BaseFallbackDownloader @@ -29,8 +30,9 @@ class YoutubeDlFallback(BaseFallbackDownloader, Youtube): @staticmethod def can_handle_link(url: str) -> bool: - attributes = YoutubeDlFallback.get_video_attributes(url) + try: + attributes = YoutubeDlFallback.get_video_attributes(url) + except NotADownloadableLinkError: + return False if attributes: return True - else: - return False diff --git a/bdfr/site_downloaders/youtube.py b/bdfr/site_downloaders/youtube.py index ba82007..f18f405 100644 --- a/bdfr/site_downloaders/youtube.py +++ b/bdfr/site_downloaders/youtube.py @@ -27,10 +27,7 @@ class Youtube(BaseDownloader): 'nooverwrites': True, } download_function = self._download_video(ytdl_options) - try: - extension = self.get_video_attributes(self.post.url)['ext'] - except KeyError: - raise NotADownloadableLinkError(f'Youtube-DL cannot download URL {self.post.url}') + extension = self.get_video_attributes(self.post.url)['ext'] res = Resource(self.post, self.post.url, download_function, extension) return [res] @@ -67,6 +64,10 @@ class Youtube(BaseDownloader): with yt_dlp.YoutubeDL({'logger': yt_logger, }) as ydl: try: result = ydl.extract_info(url, download=False) - return result except Exception as e: logger.exception(e) + raise NotADownloadableLinkError(f'Video info extraction failed for {url}') + if 'ext' in result: + return result + else: + raise NotADownloadableLinkError(f'Video info extraction failed for {url}') diff --git a/tests/site_downloaders/fallback_downloaders/test_youtubedl_fallback.py b/tests/site_downloaders/fallback_downloaders/test_youtubedl_fallback.py index f268c0a..0590687 100644 --- a/tests/site_downloaders/fallback_downloaders/test_youtubedl_fallback.py +++ b/tests/site_downloaders/fallback_downloaders/test_youtubedl_fallback.py @@ -4,6 +4,7 @@ from unittest.mock import MagicMock import pytest +from bdfr.exceptions import NotADownloadableLinkError from bdfr.resource import Resource from bdfr.site_downloaders.fallback_downloaders.youtubedl_fallback import YoutubeDlFallback @@ -13,12 +14,22 @@ from bdfr.site_downloaders.fallback_downloaders.youtubedl_fallback import Youtub ('https://www.reddit.com/r/specializedtools/comments/n2nw5m/bamboo_splitter/', True), ('https://www.youtube.com/watch?v=P19nvJOmqCc', True), ('https://www.example.com/test', False), + ('https://milesmatrix.bandcamp.com/album/la-boum/', False), )) def test_can_handle_link(test_url: str, expected: bool): result = YoutubeDlFallback.can_handle_link(test_url) assert result == expected +@pytest.mark.online +@pytest.mark.parametrize('test_url', ( + 'https://milesmatrix.bandcamp.com/album/la-boum/', +)) +def test_info_extraction_bad(test_url: str): + with pytest.raises(NotADownloadableLinkError): + YoutubeDlFallback.get_video_attributes(test_url) + + @pytest.mark.online @pytest.mark.slow @pytest.mark.parametrize(('test_url', 'expected_hash'), ( From b4dd89cddce3c6b880d0f25cb2d21e39a453d8d3 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 21 Nov 2021 12:10:26 +1000 Subject: [PATCH 09/21] Add section for common command-line tricks --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index 89a4e90..9f2ef7d 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ This is a tool to download submissions or submission data from Reddit. It can be If you wish to open an issue, please read [the guide on opening issues](docs/CONTRIBUTING.md#opening-an-issue) to ensure that your issue is clear and contains everything it needs to for the developers to investigate. +Included in this README are a few example Bash tricks to get certain behaviour. For that, see [Common Command Tricks](#common-command-tricks). + ## Installation *Bulk Downloader for Reddit* needs Python version 3.9 or above. Please update Python before installation to meet the requirement. Then, you can install it as such: ```bash @@ -208,6 +210,16 @@ The following options are for the `archive` command specifically. The `clone` command can take all the options listed above for both the `archive` and `download` commands since it performs the functions of both. +## Common Command Tricks + +A common use case is for subreddits/users to be loaded from a file. The BDFR doesn't support this directly but it is simple enough to do through the command-line. Consider a list of usernames to download; they can be passed through to the BDFR with the following command, assuming that the usernames are in a text file: + +```bash +cat users.txt | xargs -L 1 echo --user | xargs -L 50 python3 -m bdfr download +``` + +The part `-L 50` is to make sure that the character limit for a single line isn't exceeded, but may not be necessary. This can also be used to load subreddits from a file, simply exchange `--user` with `--subreddit` and so on. + ## Authentication and Security The BDFR uses OAuth2 authentication to connect to Reddit if authentication is required. This means that it is a secure, token-based system for making requests. This also means that the BDFR only has access to specific parts of the account authenticated, by default only saved posts, upvoted posts, and the identity of the authenticated account. Note that authentication is not required unless accessing private things like upvoted posts, saved posts, and private multireddits. From fc279705c1691c2a29b1abb43c98755f1663428a Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 21 Nov 2021 12:11:58 +1000 Subject: [PATCH 10/21] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9f2ef7d..f8f29e9 100644 --- a/README.md +++ b/README.md @@ -332,7 +332,7 @@ The BDFR can be run in multiple instances with multiple configurations, either c Running these scenarios consecutively is done easily, like any single run. Configuration files that differ may be specified with the `--config` option to switch between tokens, for example. Otherwise, almost all configuration for data sources can be specified per-run through the command line. -Running scenarious concurrently (at the same time) however, is more complicated. The BDFR will look to a single, static place to put the detailed log files, in a directory with the configuration file specified above. If there are multiple instances, or processes, of the BDFR running at the same time, they will all be trying to write to a single file. On Linux and other UNIX based operating systems, this will succeed, though there is a substantial risk that the logfile will be useless due to garbled and jumbled data. On Windows however, attempting this will raise an error that crashes the program as Windows forbids multiple processes from accessing the same file. +Running scenarios concurrently (at the same time) however, is more complicated. The BDFR will look to a single, static place to put the detailed log files, in a directory with the configuration file specified above. If there are multiple instances, or processes, of the BDFR running at the same time, they will all be trying to write to a single file. On Linux and other UNIX based operating systems, this will succeed, though there is a substantial risk that the logfile will be useless due to garbled and jumbled data. On Windows however, attempting this will raise an error that crashes the program as Windows forbids multiple processes from accessing the same file. The way to fix this is to use the `--log` option to manually specify where the logfile is to be stored. If the given location is unique to each instance of the BDFR, then it will run fine. From f19171a1b45bed66e4a241d4950dd2ffc39408e1 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 21 Nov 2021 12:25:05 +1000 Subject: [PATCH 11/21] Add mention of bash scripts --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index f8f29e9..7bc4f0b 100644 --- a/README.md +++ b/README.md @@ -336,6 +336,10 @@ Running scenarios concurrently (at the same time) however, is more complicated. The way to fix this is to use the `--log` option to manually specify where the logfile is to be stored. If the given location is unique to each instance of the BDFR, then it will run fine. +## Manipulating Logfiles + +The logfiles that the BDFR outputs are consistent and quite detailed and in a format that is amenable to regex. To this end, a number of bash scripts have been [included here](./scripts). They show examples for how to extract successfully downloaded IDs, failed IDs, and more besides. + ## List of currently supported sources - Direct links (links leading to a file) From 6dd17c876254844a524269d5c2c40e6dccd965af Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 21 Nov 2021 16:17:43 +1000 Subject: [PATCH 12/21] Remove unused import --- tests/test_file_name_formatter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_file_name_formatter.py b/tests/test_file_name_formatter.py index 97fd851..29ee50f 100644 --- a/tests/test_file_name_formatter.py +++ b/tests/test_file_name_formatter.py @@ -15,7 +15,6 @@ from bdfr.file_name_formatter import FileNameFormatter from bdfr.resource import Resource from bdfr.site_downloaders.base_downloader import BaseDownloader from bdfr.site_downloaders.fallback_downloaders.youtubedl_fallback import YoutubeDlFallback -from bdfr.site_downloaders.youtube import Youtube @pytest.fixture() From 2dd446a402a6cfe848d6fd42dd80b93256b6ee0c Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 21 Nov 2021 13:14:28 +1000 Subject: [PATCH 13/21] Fix max path length calculations --- bdfr/file_name_formatter.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/bdfr/file_name_formatter.py b/bdfr/file_name_formatter.py index 542a722..3e8832b 100644 --- a/bdfr/file_name_formatter.py +++ b/bdfr/file_name_formatter.py @@ -130,14 +130,19 @@ class FileNameFormatter: ending = possible_id.group(1) + ending filename = filename[:possible_id.start()] max_path = FileNameFormatter.find_max_path_length() - max_length_chars = 255 - len(ending) - max_length_bytes = 255 - len(ending.encode('utf-8')) + max_file_part_length_chars = 255 - len(ending) + max_file_part_length_bytes = 255 - len(ending.encode('utf-8')) max_path_length = max_path - len(ending) - len(str(root)) - 1 - while any([len(filename) > max_length_chars, - len(filename.encode('utf-8')) > max_length_bytes, - len(filename) > max_path_length]): + + out = Path(root, filename + ending) + while any([len(filename) > max_file_part_length_chars, + len(filename.encode('utf-8')) > max_file_part_length_bytes, + len(str(out)) > max_path_length, + ]): filename = filename[:-1] - return Path(root, filename + ending) + out = Path(root, filename + ending) + + return out @staticmethod def find_max_path_length() -> int: From 892564333176e17b919b21470571a789c50c3167 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Wed, 24 Nov 2021 10:40:18 +1000 Subject: [PATCH 14/21] Rename module to reflect backend change --- bdfr/site_downloaders/download_factory.py | 6 +++--- .../{youtubedl_fallback.py => ytdlp_fallback.py} | 6 +++--- ...st_youtubedl_fallback.py => test_ytdlp_fallback.py} | 8 ++++---- tests/site_downloaders/test_download_factory.py | 10 +++++----- tests/test_file_name_formatter.py | 6 +++--- 5 files changed, 18 insertions(+), 18 deletions(-) rename bdfr/site_downloaders/fallback_downloaders/{youtubedl_fallback.py => ytdlp_fallback.py} (84%) rename tests/site_downloaders/fallback_downloaders/{test_youtubedl_fallback.py => test_ytdlp_fallback.py} (86%) diff --git a/bdfr/site_downloaders/download_factory.py b/bdfr/site_downloaders/download_factory.py index 2f4cf9d..91489a0 100644 --- a/bdfr/site_downloaders/download_factory.py +++ b/bdfr/site_downloaders/download_factory.py @@ -9,7 +9,7 @@ from bdfr.exceptions import NotADownloadableLinkError from bdfr.site_downloaders.base_downloader import BaseDownloader from bdfr.site_downloaders.direct import Direct from bdfr.site_downloaders.erome import Erome -from bdfr.site_downloaders.fallback_downloaders.youtubedl_fallback import YoutubeDlFallback +from bdfr.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallback from bdfr.site_downloaders.gallery import Gallery from bdfr.site_downloaders.gfycat import Gfycat from bdfr.site_downloaders.imgur import Imgur @@ -49,8 +49,8 @@ class DownloadFactory: return PornHub elif re.match(r'vidble\.com', sanitised_url): return Vidble - elif YoutubeDlFallback.can_handle_link(sanitised_url): - return YoutubeDlFallback + elif YtdlpFallback.can_handle_link(sanitised_url): + return YtdlpFallback else: raise NotADownloadableLinkError(f'No downloader module exists for url {url}') diff --git a/bdfr/site_downloaders/fallback_downloaders/youtubedl_fallback.py b/bdfr/site_downloaders/fallback_downloaders/ytdlp_fallback.py similarity index 84% rename from bdfr/site_downloaders/fallback_downloaders/youtubedl_fallback.py rename to bdfr/site_downloaders/fallback_downloaders/ytdlp_fallback.py index d8753bd..1225624 100644 --- a/bdfr/site_downloaders/fallback_downloaders/youtubedl_fallback.py +++ b/bdfr/site_downloaders/fallback_downloaders/ytdlp_fallback.py @@ -15,9 +15,9 @@ from bdfr.site_downloaders.youtube import Youtube logger = logging.getLogger(__name__) -class YoutubeDlFallback(BaseFallbackDownloader, Youtube): +class YtdlpFallback(BaseFallbackDownloader, Youtube): def __init__(self, post: Submission): - super(YoutubeDlFallback, self).__init__(post) + super(YtdlpFallback, self).__init__(post) def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: out = Resource( @@ -31,7 +31,7 @@ class YoutubeDlFallback(BaseFallbackDownloader, Youtube): @staticmethod def can_handle_link(url: str) -> bool: try: - attributes = YoutubeDlFallback.get_video_attributes(url) + attributes = YtdlpFallback.get_video_attributes(url) except NotADownloadableLinkError: return False if attributes: diff --git a/tests/site_downloaders/fallback_downloaders/test_youtubedl_fallback.py b/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py similarity index 86% rename from tests/site_downloaders/fallback_downloaders/test_youtubedl_fallback.py rename to tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py index 0590687..2c4a4f6 100644 --- a/tests/site_downloaders/fallback_downloaders/test_youtubedl_fallback.py +++ b/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py @@ -6,7 +6,7 @@ import pytest from bdfr.exceptions import NotADownloadableLinkError from bdfr.resource import Resource -from bdfr.site_downloaders.fallback_downloaders.youtubedl_fallback import YoutubeDlFallback +from bdfr.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallback @pytest.mark.online @@ -17,7 +17,7 @@ from bdfr.site_downloaders.fallback_downloaders.youtubedl_fallback import Youtub ('https://milesmatrix.bandcamp.com/album/la-boum/', False), )) def test_can_handle_link(test_url: str, expected: bool): - result = YoutubeDlFallback.can_handle_link(test_url) + result = YtdlpFallback.can_handle_link(test_url) assert result == expected @@ -27,7 +27,7 @@ def test_can_handle_link(test_url: str, expected: bool): )) def test_info_extraction_bad(test_url: str): with pytest.raises(NotADownloadableLinkError): - YoutubeDlFallback.get_video_attributes(test_url) + YtdlpFallback.get_video_attributes(test_url) @pytest.mark.online @@ -41,7 +41,7 @@ def test_info_extraction_bad(test_url: str): def test_find_resources(test_url: str, expected_hash: str): test_submission = MagicMock() test_submission.url = test_url - downloader = YoutubeDlFallback(test_submission) + downloader = YtdlpFallback(test_submission) resources = downloader.find_resources() assert len(resources) == 1 assert isinstance(resources[0], Resource) diff --git a/tests/site_downloaders/test_download_factory.py b/tests/site_downloaders/test_download_factory.py index 15466cb..441b554 100644 --- a/tests/site_downloaders/test_download_factory.py +++ b/tests/site_downloaders/test_download_factory.py @@ -9,7 +9,7 @@ from bdfr.site_downloaders.base_downloader import BaseDownloader from bdfr.site_downloaders.direct import Direct from bdfr.site_downloaders.download_factory import DownloadFactory from bdfr.site_downloaders.erome import Erome -from bdfr.site_downloaders.fallback_downloaders.youtubedl_fallback import YoutubeDlFallback +from bdfr.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallback from bdfr.site_downloaders.gallery import Gallery from bdfr.site_downloaders.gfycat import Gfycat from bdfr.site_downloaders.imgur import Imgur @@ -42,10 +42,10 @@ from bdfr.site_downloaders.youtube import Youtube ('https://i.imgur.com/3SKrQfK.jpg?1', Direct), ('https://dynasty-scans.com/system/images_images/000/017/819/original/80215103_p0.png?1612232781', Direct), ('https://m.imgur.com/a/py3RW0j', Imgur), - ('https://v.redd.it/9z1dnk3xr5k61', YoutubeDlFallback), - ('https://streamable.com/dt46y', YoutubeDlFallback), - ('https://vimeo.com/channels/31259/53576664', YoutubeDlFallback), - ('http://video.pbs.org/viralplayer/2365173446/', YoutubeDlFallback), + ('https://v.redd.it/9z1dnk3xr5k61', YtdlpFallback), + ('https://streamable.com/dt46y', YtdlpFallback), + ('https://vimeo.com/channels/31259/53576664', YtdlpFallback), + ('http://video.pbs.org/viralplayer/2365173446/', YtdlpFallback), ('https://www.pornhub.com/view_video.php?viewkey=ph5a2ee0461a8d0', PornHub), )) def test_factory_lever_good(test_submission_url: str, expected_class: BaseDownloader, reddit_instance: praw.Reddit): diff --git a/tests/test_file_name_formatter.py b/tests/test_file_name_formatter.py index 29ee50f..e60ae8d 100644 --- a/tests/test_file_name_formatter.py +++ b/tests/test_file_name_formatter.py @@ -14,7 +14,7 @@ import pytest from bdfr.file_name_formatter import FileNameFormatter from bdfr.resource import Resource from bdfr.site_downloaders.base_downloader import BaseDownloader -from bdfr.site_downloaders.fallback_downloaders.youtubedl_fallback import YoutubeDlFallback +from bdfr.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallback @pytest.fixture() @@ -402,8 +402,8 @@ def test_windows_max_path(tmp_path: Path): @pytest.mark.online @pytest.mark.reddit @pytest.mark.parametrize(('test_reddit_id', 'test_downloader', 'expected_names'), ( - ('gphmnr', YoutubeDlFallback, {'He has a lot to say today.mp4'}), - ('d0oir2', YoutubeDlFallback, {"Crunk's finest moment. Welcome to the new subreddit!.mp4"}), + ('gphmnr', YtdlpFallback, {'He has a lot to say today.mp4'}), + ('d0oir2', YtdlpFallback, {"Crunk's finest moment. Welcome to the new subreddit!.mp4"}), )) def test_name_submission( test_reddit_id: str, From 4a864827567bd59c2c473f6d2a08d4bbd46da1a3 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Wed, 24 Nov 2021 11:07:52 +1000 Subject: [PATCH 15/21] Add skip statement for broken test on windows --- tests/test_file_name_formatter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_file_name_formatter.py b/tests/test_file_name_formatter.py index e60ae8d..30fac77 100644 --- a/tests/test_file_name_formatter.py +++ b/tests/test_file_name_formatter.py @@ -2,6 +2,7 @@ # coding=utf-8 import platform +import sys import unittest.mock from datetime import datetime from pathlib import Path @@ -213,6 +214,7 @@ def test_preserve_id_append_when_shortening(test_filename: str, test_ending: str assert len(str(result)) <= FileNameFormatter.find_max_path_length() +@pytest.mark.skipif(sys.platform == 'win32', reason='Test broken on windows github') def test_shorten_filename_real(submission: MagicMock, tmp_path: Path): submission.title = 'A' * 500 submission.author.name = 'test' From dd8d74ee25a34cdfe36c271e6a26cc5f6e9ed7d8 Mon Sep 17 00:00:00 2001 From: "Jay R. Wren" Date: Sat, 30 Oct 2021 22:19:46 -0400 Subject: [PATCH 16/21] Add --ignore to ignore user --- README.md | 3 +++ bdfr/__main__.py | 1 + bdfr/archiver.py | 2 ++ bdfr/cloner.py | 2 ++ bdfr/configuration.py | 1 + 5 files changed, 9 insertions(+) diff --git a/README.md b/README.md index 7bc4f0b..3ffef7f 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,9 @@ The following options are common between both the `archive` and `download` comma - Can be specified multiple times - Disables certain modules from being used - See [Disabling Modules](#disabling-modules) for more information and a list of module names +- `--ignore` + - This will add a user to ignore + - Can be specified multiple times - `--include-id-file` - This will add any submission with the IDs in the files provided - Can be specified multiple times diff --git a/bdfr/__main__.py b/bdfr/__main__.py index 367f8c6..de658de 100644 --- a/bdfr/__main__.py +++ b/bdfr/__main__.py @@ -17,6 +17,7 @@ _common_options = [ click.option('--authenticate', is_flag=True, default=None), click.option('--config', type=str, default=None), click.option('--disable-module', multiple=True, default=None, type=str), + click.option('--ignore-user', type=str, multiple=True, default=None), click.option('--include-id-file', multiple=True, default=None), click.option('--log', type=str, default=None), click.option('--saved', is_flag=True, default=None), diff --git a/bdfr/archiver.py b/bdfr/archiver.py index d445e8d..e51be57 100644 --- a/bdfr/archiver.py +++ b/bdfr/archiver.py @@ -28,6 +28,8 @@ class Archiver(RedditConnector): def download(self): for generator in self.reddit_lists: for submission in generator: + if submission.author.name in self.args.ignore_user: + continue logger.debug(f'Attempting to archive submission {submission.id}') self.write_entry(submission) diff --git a/bdfr/cloner.py b/bdfr/cloner.py index 979f50f..c48ae17 100644 --- a/bdfr/cloner.py +++ b/bdfr/cloner.py @@ -17,5 +17,7 @@ class RedditCloner(RedditDownloader, Archiver): def download(self): for generator in self.reddit_lists: for submission in generator: + if submission.author.name in self.args.ignore_user: + continue self._download_submission(submission) self.write_entry(submission) diff --git a/bdfr/configuration.py b/bdfr/configuration.py index bc4c541..81fa3e4 100644 --- a/bdfr/configuration.py +++ b/bdfr/configuration.py @@ -18,6 +18,7 @@ class Configuration(Namespace): self.exclude_id_file = [] self.file_scheme: str = '{REDDITOR}_{TITLE}_{POSTID}' self.folder_scheme: str = '{SUBREDDIT}' + self.ignore_user = [] self.include_id_file = [] self.limit: Optional[int] = None self.link: list[str] = [] From 2b50ee072400226e15ee2526985214ce938a6670 Mon Sep 17 00:00:00 2001 From: "Jay R. Wren" Date: Mon, 1 Nov 2021 09:28:46 -0400 Subject: [PATCH 17/21] add test. fix typos. --- README.md | 2 +- bdfr/cloner.py | 2 -- bdfr/downloader.py | 4 ++++ tests/test_downloader.py | 17 +++++++++++++++++ 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 3ffef7f..b84aa3d 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ The following options are common between both the `archive` and `download` comma - Can be specified multiple times - Disables certain modules from being used - See [Disabling Modules](#disabling-modules) for more information and a list of module names -- `--ignore` +- `--ignore-user` - This will add a user to ignore - Can be specified multiple times - `--include-id-file` diff --git a/bdfr/cloner.py b/bdfr/cloner.py index c48ae17..979f50f 100644 --- a/bdfr/cloner.py +++ b/bdfr/cloner.py @@ -17,7 +17,5 @@ class RedditCloner(RedditDownloader, Archiver): def download(self): for generator in self.reddit_lists: for submission in generator: - if submission.author.name in self.args.ignore_user: - continue self._download_submission(submission) self.write_entry(submission) diff --git a/bdfr/downloader.py b/bdfr/downloader.py index 70052b2..edfd68e 100644 --- a/bdfr/downloader.py +++ b/bdfr/downloader.py @@ -51,6 +51,10 @@ class RedditDownloader(RedditConnector): elif submission.subreddit.display_name.lower() in self.args.skip_subreddit: logger.debug(f'Submission {submission.id} in {submission.subreddit.display_name} in skip list') return + elif submission.author.name in self.args.ignore_user: + logger.debug( + f'Submission {submission.id} in {submission.subreddit.display_name} by {submission.author.name} an ignored user') + return elif not isinstance(submission, praw.models.Submission): logger.warning(f'{submission.id} is not a submission') return diff --git a/tests/test_downloader.py b/tests/test_downloader.py index e5f0a31..0cc8dec 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -200,3 +200,20 @@ def test_download_submission( RedditDownloader._download_submission(downloader_mock, submission) folder_contents = list(tmp_path.iterdir()) assert len(folder_contents) == expected_files_len + + +@pytest.mark.parametrize('test_ignore_user', ( + 'alice', +)) +def test_download_ignores_user( + test_ignore_user: str, + mock_function: MagicMock, + downloader_mock: MagicMock, +): + downloader_mock.args.ignore_user = test_ignore_user + submission = downloader_mock.reddit_instance.submission(id='m1hqw6') + mock_function.return_value = MagicMock() + mock_function.return_value.__name__ = 'test' + submission.author.name = test_ignore_user + RedditDownloader._download_submission(downloader_mock, submission) + assert mock_function.call_count == 0 From 0eeb4b46dc70fe30f4e2c865ee6578245e5a30f9 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Wed, 24 Nov 2021 10:48:06 +1000 Subject: [PATCH 18/21] Remove bad test --- tests/test_downloader.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 0cc8dec..e5f0a31 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -200,20 +200,3 @@ def test_download_submission( RedditDownloader._download_submission(downloader_mock, submission) folder_contents = list(tmp_path.iterdir()) assert len(folder_contents) == expected_files_len - - -@pytest.mark.parametrize('test_ignore_user', ( - 'alice', -)) -def test_download_ignores_user( - test_ignore_user: str, - mock_function: MagicMock, - downloader_mock: MagicMock, -): - downloader_mock.args.ignore_user = test_ignore_user - submission = downloader_mock.reddit_instance.submission(id='m1hqw6') - mock_function.return_value = MagicMock() - mock_function.return_value.__name__ = 'test' - submission.author.name = test_ignore_user - RedditDownloader._download_submission(downloader_mock, submission) - assert mock_function.call_count == 0 From d0d72c82299be3d3a88a6eca950469ced4fce44b Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Wed, 24 Nov 2021 10:54:29 +1000 Subject: [PATCH 19/21] Add integration test for downloader option --- bdfr/downloader.py | 3 ++- .../test_download_integration.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/bdfr/downloader.py b/bdfr/downloader.py index edfd68e..028430f 100644 --- a/bdfr/downloader.py +++ b/bdfr/downloader.py @@ -53,7 +53,8 @@ class RedditDownloader(RedditConnector): return elif submission.author.name in self.args.ignore_user: logger.debug( - f'Submission {submission.id} in {submission.subreddit.display_name} by {submission.author.name} an ignored user') + f'Submission {submission.id} in {submission.subreddit.display_name} skipped' + f' due to {submission.author.name} being an ignored user') return elif not isinstance(submission, praw.models.Submission): logger.warning(f'{submission.id} is not a submission') diff --git a/tests/integration_tests/test_download_integration.py b/tests/integration_tests/test_download_integration.py index c2414ba..bd53382 100644 --- a/tests/integration_tests/test_download_integration.py +++ b/tests/integration_tests/test_download_integration.py @@ -337,3 +337,18 @@ def test_cli_download_include_id_file(tmp_path: Path): result = runner.invoke(cli, test_args) assert result.exit_code == 0 assert 'Downloaded submission' in result.output + + +@pytest.mark.online +@pytest.mark.reddit +@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') +@pytest.mark.parametrize('test_args', ( + ['--ignore-user', 'ArjanEgges', '-l', 'm3hxzd'], +)) +def test_cli_download_ignore_user(test_args: list[str], tmp_path: Path): + runner = CliRunner() + test_args = create_basic_args_for_download_runner(test_args, tmp_path) + result = runner.invoke(cli, test_args) + assert result.exit_code == 0 + assert 'Downloaded submission' not in result.output + assert 'being an ignored user' in result.output From f670b347ae94c2366da6fdab1f2a4e34eeb82249 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Wed, 24 Nov 2021 10:58:18 +1000 Subject: [PATCH 20/21] Add integration test for archiver option --- bdfr/archiver.py | 3 +++ .../integration_tests/test_archive_integration.py | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/bdfr/archiver.py b/bdfr/archiver.py index e51be57..a2e54e5 100644 --- a/bdfr/archiver.py +++ b/bdfr/archiver.py @@ -29,6 +29,9 @@ class Archiver(RedditConnector): for generator in self.reddit_lists: for submission in generator: if submission.author.name in self.args.ignore_user: + logger.debug( + f'Submission {submission.id} in {submission.subreddit.display_name} skipped' + f' due to {submission.author.name} being an ignored user') continue logger.debug(f'Attempting to archive submission {submission.id}') self.write_entry(submission) diff --git a/tests/integration_tests/test_archive_integration.py b/tests/integration_tests/test_archive_integration.py index 8cbb2d5..5ef04a6 100644 --- a/tests/integration_tests/test_archive_integration.py +++ b/tests/integration_tests/test_archive_integration.py @@ -106,3 +106,18 @@ def test_cli_archive_long(test_args: list[str], tmp_path: Path): result = runner.invoke(cli, test_args) assert result.exit_code == 0 assert re.search(r'Writing entry .*? to file in .*? format', result.output) + + +@pytest.mark.online +@pytest.mark.reddit +@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') +@pytest.mark.parametrize('test_args', ( + ['--ignore-user', 'ArjanEgges', '-l', 'm3hxzd'], +)) +def test_cli_archive_ignore_user(test_args: list[str], tmp_path: Path): + runner = CliRunner() + test_args = create_basic_args_for_archive_runner(test_args, tmp_path) + result = runner.invoke(cli, test_args) + assert result.exit_code == 0 + assert 'being an ignored user' in result.output + assert 'Attempting to archive submission' not in result.output From cc80acd6b5ad67c885d7faaa1656b3bc7abc421e Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Wed, 24 Nov 2021 13:06:07 +1000 Subject: [PATCH 21/21] Increase version number --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 94ae1de..e5e244b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,7 +4,7 @@ description_file = README.md description_content_type = text/markdown home_page = https://github.com/aliparlakci/bulk-downloader-for-reddit keywords = reddit, download, archive -version = 2.4.2 +version = 2.5.0 author = Ali Parlakci author_email = parlakciali@gmail.com maintainer = Serene Arc