diff --git a/README.md b/README.md index f89ef23..9f32599 100644 --- a/README.md +++ b/README.md @@ -151,17 +151,17 @@ The following options apply only to the `download` command. This command downloa - Sets the scheme for folders - Default is `{SUBREDDIT}` - See [Folder and File Name Schemes](#folder-and-file-name-schemes) for more details -- `--skip-id` +- `--exclude-id` - This will skip the download of any submission with the ID provided - Can be specified multiple times -- `--skip-id-file` +- `--exclude-id-file` - This will skip the download of any submission with any of the IDs in the files provided - Can be specified multiple times - Format is one ID per line - `--skip-domain` - This adds domains to the download filter i.e. submissions coming from these domains will not be downloaded - Can be specified multiple times -- `--skip-format` +- `--skip` - This adds file types to the download filter i.e. submissions with one of the supplied file extensions will not be downloaded - Can be specified multiple times - `--skip-subreddit` diff --git a/bdfr/__main__.py b/bdfr/__main__.py index bafa93c..28ef207 100644 --- a/bdfr/__main__.py +++ b/bdfr/__main__.py @@ -51,9 +51,9 @@ def cli(): @click.option('--max-wait-time', type=int, default=None) @click.option('--no-dupes', is_flag=True, default=None) @click.option('--search-existing', is_flag=True, default=None) -@click.option('--skip-id', default=None, multiple=True) -@click.option('--skip-id-file', default=None, multiple=True) -@click.option('--skip-format', default=None, multiple=True) +@click.option('--exclude-id', default=None, multiple=True) +@click.option('--exclude-id-file', default=None, multiple=True) +@click.option('--skip', default=None, multiple=True) @click.option('--skip-domain', default=None, multiple=True) @click.option('--skip-subreddit', default=None, multiple=True) @_add_common_options diff --git a/bdfr/configuration.py b/bdfr/configuration.py index 8cb8f10..9ab9d45 100644 --- a/bdfr/configuration.py +++ b/bdfr/configuration.py @@ -13,8 +13,8 @@ class Configuration(Namespace): self.authenticate = False self.config = None self.directory: str = '.' - self.skip_id = [] - self.skip_id_file = [] + self.exclude_id = [] + self.exclude_id_file = [] self.limit: Optional[int] = None self.link: list[str] = [] self.log: Optional[str] = None @@ -26,7 +26,7 @@ class Configuration(Namespace): self.search_existing: bool = False self.file_scheme: str = '{REDDITOR}_{TITLE}_{POSTID}' self.folder_scheme: str = '{SUBREDDIT}' - self.skip_format: list[str] = [] + self.skip: list[str] = [] self.skip_domain: list[str] = [] self.skip_subreddit: list[str] = [] self.sort: str = 'hot' diff --git a/bdfr/connector.py b/bdfr/connector.py index 3dcc118..c20b749 100644 --- a/bdfr/connector.py +++ b/bdfr/connector.py @@ -367,7 +367,7 @@ class RedditConnector(metaclass=ABCMeta): return RedditTypes.SortType.HOT def create_download_filter(self) -> DownloadFilter: - return DownloadFilter(self.args.skip_format, self.args.skip_domain) + return DownloadFilter(self.args.skip, self.args.skip_domain) def create_authenticator(self) -> SiteAuthenticator: return SiteAuthenticator(self.cfg_parser) @@ -389,8 +389,8 @@ class RedditConnector(metaclass=ABCMeta): def read_excluded_ids(self) -> set[str]: out = [] - out.extend(self.args.skip_id) - for id_file in self.args.skip_id_file: + out.extend(self.args.exclude_id) + for id_file in self.args.exclude_id_file: id_file = Path(id_file).resolve().expanduser() if not id_file.exists(): logger.warning(f'ID exclusion file at {id_file} does not exist') diff --git a/scripts/README.md b/scripts/README.md index 51e51bb..4bb098b 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -9,7 +9,7 @@ Due to the verboseness of the logs, a great deal of information can be gathered ## Extract all Successfully Downloaded IDs -This script is contained [here](extract_successful_ids.sh) and will result in a file that contains the IDs of everything that was successfully downloaded without an error. That is, a list will be created of submissions that, with the `--skip-id-file` option, can be used so that the BDFR will not attempt to redownload these submissions/comments. This is likely to cause a performance increase, especially when the BDFR run finds many resources. +This script is contained [here](extract_successful_ids.sh) and will result in a file that contains the IDs of everything that was successfully downloaded without an error. That is, a list will be created of submissions that, with the `--exclude-id-file` option, can be used so that the BDFR will not attempt to redownload these submissions/comments. This is likely to cause a performance increase, especially when the BDFR run finds many resources. The script can be used with the following signature: diff --git a/tests/test_connector.py b/tests/test_connector.py index 41d9115..1078707 100644 --- a/tests/test_connector.py +++ b/tests/test_connector.py @@ -54,7 +54,7 @@ def test_determine_directories(tmp_path: Path, downloader_mock: MagicMock): (['.test'], ['test.com'],), )) def test_create_download_filter(skip_extensions: list[str], skip_domains: list[str], downloader_mock: MagicMock): - downloader_mock.args.skip_format = skip_extensions + downloader_mock.args.skip = skip_extensions downloader_mock.args.skip_domain = skip_domains result = RedditConnector.create_download_filter(downloader_mock) @@ -324,7 +324,7 @@ def test_split_subreddit_entries(test_subreddit_entries: list[str], expected: se def test_read_excluded_submission_ids_from_file(downloader_mock: MagicMock, tmp_path: Path): test_file = tmp_path / 'test.txt' test_file.write_text('aaaaaa\nbbbbbb') - downloader_mock.args.skip_id_file = [test_file] + downloader_mock.args.exclude_id_file = [test_file] results = RedditConnector.read_excluded_ids(downloader_mock) assert results == {'aaaaaa', 'bbbbbb'} diff --git a/tests/test_integration.py b/tests/test_integration.py index 419464f..7aec0eb 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -166,8 +166,8 @@ def test_cli_download_search_existing(test_args: list[str], tmp_path: Path): @pytest.mark.reddit @pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') @pytest.mark.parametrize('test_args', ( - ['--subreddit', 'tumblr', '-L', '25', '--skip-format', 'png', '--skip-format', 'jpg'], - ['--subreddit', 'MaliciousCompliance', '-L', '25', '--skip-format', 'txt'], + ['--subreddit', 'tumblr', '-L', '25', '--skip', 'png', '--skip', 'jpg'], + ['--subreddit', 'MaliciousCompliance', '-L', '25', '--skip', 'txt'], )) def test_cli_download_download_filters(test_args: list[str], tmp_path: Path): runner = CliRunner() @@ -299,7 +299,7 @@ def test_cli_download_use_default_config(tmp_path: Path): @pytest.mark.reddit @pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') @pytest.mark.parametrize('test_args', ( - ['-l', 'm2601g', '--skip-id', 'm2601g'], + ['-l', 'm2601g', '--exclude-id', 'm2601g'], )) def test_cli_download_links_exclusion(test_args: list[str], tmp_path: Path): runner = CliRunner()