diff --git a/README.md b/README.md index fd5a3b7..405cf84 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,10 @@ The following options are common between both the `archive` and `download` comma - `--config` - If the path to a configuration file is supplied with this option, the BDFR will use the specified config - See [Configuration Files](#configuration) for more details +- `--disable-module` + - Can be specified multiple times + - Disables certain modules from being used + - See [Disabling Modules](#disabling-modules) for more information and a list of module names - `--log` - This allows one to specify the location of the logfile - This must be done when running multiple instances of the BDFR, see [Multiple Instances](#multiple-instances) below @@ -266,6 +270,7 @@ The following keys are optional, and defaults will be used if they cannot be fou - `backup_log_count` - `max_wait_time` - `time_format` + - `disabled_modules` All of these should not be modified unless you know what you're doing, as the default values will enable the BDFR to function just fine. A configuration is included in the BDFR when it is installed, and this will be placed in the configuration directory as the default. @@ -277,6 +282,22 @@ The option `time_format` will specify the format of the timestamp that replaces The format can be specified through the [format codes](https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior) that are standard in the Python `datetime` library. +#### Disabling Modules + +The individual modules of the BDFR, used to download submissions from websites, can be disabled. This is helpful especially in the case of the fallback downloaders, since the `--skip-domain` option cannot be effectively used in these cases. For example, the Youtube-DL downloader can retrieve data from hundreds of websites and domains; thus the only way to fully disable it is via the `--disable-module` option. + +Modules can be disabled through the command line interface for the BDFR or more permanently in the configuration file via the `disabled_modules` option. The list of downloaders that can be disabled are the following. Note that they are case-insensitive. + +- `Direct` +- `Erome` +- `Gallery` (Reddit Image Galleries) +- `Gfycat` +- `Imgur` +- `Redgifs` +- `SelfPost` (Reddit Text Post) +- `Youtube` +- `YoutubeDlFallback` + ### Rate Limiting The option `max_wait_time` has to do with retrying downloads. There are certain HTTP errors that mean that no amount of requests will return the wanted data, but some errors are from rate-limiting. This is when a single client is making so many requests that the remote website cuts the client off to preserve the function of the site. This is a common situation when downloading many resources from the same site. It is polite and best practice to obey the website's wishes in these cases. diff --git a/bdfr/__main__.py b/bdfr/__main__.py index 0d299c9..1103581 100644 --- a/bdfr/__main__.py +++ b/bdfr/__main__.py @@ -14,19 +14,20 @@ logger = logging.getLogger() _common_options = [ click.argument('directory', type=str), - click.option('--config', type=str, default=None), - click.option('-v', '--verbose', default=None, count=True), - click.option('-l', '--link', multiple=True, default=None, type=str), - click.option('-s', '--subreddit', multiple=True, default=None, type=str), - click.option('-m', '--multireddit', multiple=True, default=None, type=str), - click.option('-L', '--limit', default=None, type=int), click.option('--authenticate', is_flag=True, default=None), + click.option('--config', type=str, default=None), + click.option('--disable-module', multiple=True, default=None, type=str), click.option('--log', type=str, default=None), - click.option('--submitted', is_flag=True, default=None), - click.option('--upvoted', is_flag=True, default=None), click.option('--saved', is_flag=True, default=None), click.option('--search', default=None, type=str), + click.option('--submitted', is_flag=True, default=None), click.option('--time-format', type=str, default=None), + click.option('--upvoted', is_flag=True, default=None), + click.option('-L', '--limit', default=None, type=int), + click.option('-l', '--link', multiple=True, default=None, type=str), + click.option('-m', '--multireddit', multiple=True, default=None, type=str), + click.option('-s', '--subreddit', multiple=True, default=None, type=str), + click.option('-v', '--verbose', default=None, count=True), click.option('-u', '--user', type=str, multiple=True, default=None), click.option('-t', '--time', type=click.Choice(('all', 'hour', 'day', 'week', 'month', 'year')), default=None), click.option('-S', '--sort', type=click.Choice(('hot', 'top', 'new', diff --git a/bdfr/configuration.py b/bdfr/configuration.py index 446bc82..327a453 100644 --- a/bdfr/configuration.py +++ b/bdfr/configuration.py @@ -13,19 +13,21 @@ class Configuration(Namespace): self.authenticate = False self.config = None self.directory: str = '.' + self.disable_module: list[str] = [] self.exclude_id = [] self.exclude_id_file = [] + self.file_scheme: str = '{REDDITOR}_{TITLE}_{POSTID}' + self.folder_scheme: str = '{SUBREDDIT}' self.limit: Optional[int] = None self.link: list[str] = [] self.log: Optional[str] = None + self.make_hard_links = False self.max_wait_time = None self.multireddit: list[str] = [] self.no_dupes: bool = False self.saved: bool = False self.search: Optional[str] = None self.search_existing: bool = False - self.file_scheme: str = '{REDDITOR}_{TITLE}_{POSTID}' - self.folder_scheme: str = '{SUBREDDIT}' self.skip: list[str] = [] self.skip_domain: list[str] = [] self.skip_subreddit: list[str] = [] @@ -37,7 +39,6 @@ class Configuration(Namespace): self.upvoted: bool = False self.user: list[str] = [] self.verbose: int = 0 - self.make_hard_links = False # Archiver-specific options self.format = 'json' diff --git a/bdfr/connector.py b/bdfr/connector.py index 6aec2f5..68efc0c 100644 --- a/bdfr/connector.py +++ b/bdfr/connector.py @@ -64,6 +64,8 @@ class RedditConnector(metaclass=ABCMeta): self.read_config() + self.parse_disabled_modules() + self.download_filter = self.create_download_filter() logger.log(9, 'Created download filter') self.time_filter = self.create_time_filter() @@ -99,10 +101,19 @@ class RedditConnector(metaclass=ABCMeta): option = 'ISO' logger.debug(f'Setting datetime format string to {option}') self.args.time_format = option + if not self.args.disable_module: + self.args.disable_module = [self.cfg_parser.get('DEFAULT', 'disabled_modules', fallback='')] # Update config on disk with open(self.config_location, 'w') as file: self.cfg_parser.write(file) + def parse_disabled_modules(self): + disabled_modules = self.args.disable_module + disabled_modules = self.split_args_input(disabled_modules) + disabled_modules = set([name.strip().lower() for name in disabled_modules]) + self.args.disable_module = disabled_modules + logger.debug(f'Disabling the following modules: {", ".join(self.args.disable_module)}') + def create_reddit_instance(self): if self.args.authenticate: logger.debug('Using authenticated Reddit instance') diff --git a/bdfr/downloader.py b/bdfr/downloader.py index 3b2c581..61158a3 100644 --- a/bdfr/downloader.py +++ b/bdfr/downloader.py @@ -63,7 +63,9 @@ class RedditDownloader(RedditConnector): except errors.NotADownloadableLinkError as e: logger.error(f'Could not download submission {submission.id}: {e}') return - + if downloader_class.__name__.lower() in self.args.disable_module: + logger.debug(f'Submission {submission.id} skipped due to disabled module {downloader_class.__name__}') + return try: content = downloader.find_resources(self.authenticator) except errors.SiteDownloaderError as e: diff --git a/tests/site_downloaders/fallback_downloaders/youtubedl_fallback.py b/tests/site_downloaders/fallback_downloaders/test_youtubedl_fallback.py similarity index 100% rename from tests/site_downloaders/fallback_downloaders/youtubedl_fallback.py rename to tests/site_downloaders/fallback_downloaders/test_youtubedl_fallback.py diff --git a/tests/test_connector.py b/tests/test_connector.py index 03d2668..2249b96 100644 --- a/tests/test_connector.py +++ b/tests/test_connector.py @@ -313,7 +313,9 @@ def test_sanitise_subreddit_name(test_name: str, expected: str): (['test1,test2', 'test3'], {'test1', 'test2', 'test3'}), (['test1, test2', 'test3'], {'test1', 'test2', 'test3'}), (['test1; test2', 'test3'], {'test1', 'test2', 'test3'}), - (['test1, test2', 'test1,test2,test3', 'test4'], {'test1', 'test2', 'test3', 'test4'}) + (['test1, test2', 'test1,test2,test3', 'test4'], {'test1', 'test2', 'test3', 'test4'}), + ([''], {''}), + (['test'], {'test'}), )) def test_split_subreddit_entries(test_subreddit_entries: list[str], expected: set[str]): results = RedditConnector.split_args_input(test_subreddit_entries) diff --git a/tests/test_integration.py b/tests/test_integration.py index ed67f03..6a9e52b 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -348,7 +348,7 @@ def test_cli_download_subreddit_exclusion(test_args: list[str], tmp_path: Path): ['--file-scheme', '{TITLE}'], ['--file-scheme', '{TITLE}_test_{SUBREDDIT}'], )) -def test_cli_file_scheme_warning(test_args: list[str], tmp_path: Path): +def test_cli_download_file_scheme_warning(test_args: list[str], tmp_path: Path): runner = CliRunner() test_args = create_basic_args_for_download_runner(test_args, tmp_path) result = runner.invoke(cli, test_args) @@ -356,6 +356,23 @@ def test_cli_file_scheme_warning(test_args: list[str], tmp_path: Path): assert 'Some files might not be downloaded due to name conflicts' in result.output +@pytest.mark.online +@pytest.mark.reddit +@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') +@pytest.mark.parametrize('test_args', ( + ['-l', 'm2601g', '--disable-module', 'Direct'], + ['-l', 'nnb9vs', '--disable-module', 'YoutubeDlFallback'], + ['-l', 'nnb9vs', '--disable-module', 'youtubedlfallback'], +)) +def test_cli_download_disable_modules(test_args: list[str], tmp_path: Path): + runner = CliRunner() + test_args = create_basic_args_for_download_runner(test_args, tmp_path) + result = runner.invoke(cli, test_args) + assert result.exit_code == 0 + assert 'skipped due to disabled module' in result.output + assert 'Downloaded submission' not in result.output + + @pytest.mark.online @pytest.mark.reddit @pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')