Add ability to disable modules (#434)

* Fix test name to match standard * Rename file * Add ability to disable modules * Update README * Fix missing comma * Fix more missing commas. sigh... Co-authored-by: Ali Parlakçı <parlakciali@gmail.com>
2024-05-19 11:42:40 +12:00 · 2021-06-06 20:47:56 +10:00 · 2021-06-06 20:47:56 +10:00 · 6dcef83666
parent 434aeb8feb
commit 6dcef83666
8 changed files with 69 additions and 14 deletions
--- a/README.md
+++ b/README.md
@ -71,6 +71,10 @@ The following options are common between both the `archive` and `download` comma
 - `--config`
  - If the path to a configuration file is supplied with this option, the BDFR will use the specified config
  - See [Configuration Files](#configuration) for more details
+- `--disable-module`
+  - Can be specified multiple times
+  - Disables certain modules from being used
+  - See [Disabling Modules](#disabling-modules) for more information and a list of module names
 - `--log`
  - This allows one to specify the location of the logfile
  - This must be done when running multiple instances of the BDFR, see [Multiple Instances](#multiple-instances) below
@ -266,6 +270,7 @@ The following keys are optional, and defaults will be used if they cannot be fou
  - `backup_log_count`
  - `max_wait_time`
  - `time_format`
+  - `disabled_modules`

 All of these should not be modified unless you know what you're doing, as the default values will enable the BDFR to function just fine. A configuration is included in the BDFR when it is installed, and this will be placed in the configuration directory as the default.

@ -277,6 +282,22 @@ The option `time_format` will specify the format of the timestamp that replaces

 The format can be specified through the [format codes](https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior) that are standard in the Python `datetime` library.

+#### Disabling Modules
+
+The individual modules of the BDFR, used to download submissions from websites, can be disabled. This is helpful especially in the case of the fallback downloaders, since the `--skip-domain` option cannot be effectively used in these cases. For example, the Youtube-DL downloader can retrieve data from hundreds of websites and domains; thus the only way to fully disable it is via the `--disable-module` option.
+
+Modules can be disabled through the command line interface for the BDFR or more permanently in the configuration file via the `disabled_modules` option. The list of downloaders that can be disabled are the following. Note that they are case-insensitive.
+
+- `Direct`
+- `Erome`
+- `Gallery` (Reddit Image Galleries)
+- `Gfycat`
+- `Imgur`
+- `Redgifs`
+- `SelfPost` (Reddit Text Post)
+- `Youtube`
+- `YoutubeDlFallback`
+
 ### Rate Limiting

 The option `max_wait_time` has to do with retrying downloads. There are certain HTTP errors that mean that no amount of requests will return the wanted data, but some errors are from rate-limiting. This is when a single client is making so many requests that the remote website cuts the client off to preserve the function of the site. This is a common situation when downloading many resources from the same site. It is polite and best practice to obey the website's wishes in these cases.
--- a/bdfr/main.py
+++ b/bdfr/main.py
@ -14,19 +14,20 @@ logger = logging.getLogger()

 _common_options = [
    click.argument('directory', type=str),
-    click.option('--config', type=str, default=None),
-    click.option('-v', '--verbose', default=None, count=True),
-    click.option('-l', '--link', multiple=True, default=None, type=str),
-    click.option('-s', '--subreddit', multiple=True, default=None, type=str),
-    click.option('-m', '--multireddit', multiple=True, default=None, type=str),
-    click.option('-L', '--limit', default=None, type=int),
    click.option('--authenticate', is_flag=True, default=None),
+    click.option('--config', type=str, default=None),
+    click.option('--disable-module', multiple=True, default=None, type=str),
    click.option('--log', type=str, default=None),
-    click.option('--submitted', is_flag=True, default=None),
-    click.option('--upvoted', is_flag=True, default=None),
    click.option('--saved', is_flag=True, default=None),
    click.option('--search', default=None, type=str),
+    click.option('--submitted', is_flag=True, default=None),
    click.option('--time-format', type=str, default=None),
+    click.option('--upvoted', is_flag=True, default=None),
+    click.option('-L', '--limit', default=None, type=int),
+    click.option('-l', '--link', multiple=True, default=None, type=str),
+    click.option('-m', '--multireddit', multiple=True, default=None, type=str),
+    click.option('-s', '--subreddit', multiple=True, default=None, type=str),
+    click.option('-v', '--verbose', default=None, count=True),
    click.option('-u', '--user', type=str, multiple=True, default=None),
    click.option('-t', '--time', type=click.Choice(('all', 'hour', 'day', 'week', 'month', 'year')), default=None),
    click.option('-S', '--sort', type=click.Choice(('hot', 'top', 'new',
--- a/bdfr/configuration.py
+++ b/bdfr/configuration.py
@ -13,19 +13,21 @@ class Configuration(Namespace):
        self.authenticate = False
        self.config = None
        self.directory: str = '.'
+        self.disable_module: list[str] = []
        self.exclude_id = []
        self.exclude_id_file = []
+        self.file_scheme: str = '{REDDITOR}_{TITLE}_{POSTID}'
+        self.folder_scheme: str = '{SUBREDDIT}'
        self.limit: Optional[int] = None
        self.link: list[str] = []
        self.log: Optional[str] = None
+        self.make_hard_links = False
        self.max_wait_time = None
        self.multireddit: list[str] = []
        self.no_dupes: bool = False
        self.saved: bool = False
        self.search: Optional[str] = None
        self.search_existing: bool = False
-        self.file_scheme: str = '{REDDITOR}_{TITLE}_{POSTID}'
-        self.folder_scheme: str = '{SUBREDDIT}'
        self.skip: list[str] = []
        self.skip_domain: list[str] = []
        self.skip_subreddit: list[str] = []
@ -37,7 +39,6 @@ class Configuration(Namespace):
        self.upvoted: bool = False
        self.user: list[str] = []
        self.verbose: int = 0
-        self.make_hard_links = False

        # Archiver-specific options
        self.format = 'json'
--- a/bdfr/connector.py
+++ b/bdfr/connector.py
@ -64,6 +64,8 @@ class RedditConnector(metaclass=ABCMeta):

        self.read_config()

+        self.parse_disabled_modules()
+
        self.download_filter = self.create_download_filter()
        logger.log(9, 'Created download filter')
        self.time_filter = self.create_time_filter()
@ -99,10 +101,19 @@ class RedditConnector(metaclass=ABCMeta):
                option = 'ISO'
            logger.debug(f'Setting datetime format string to {option}')
            self.args.time_format = option
+        if not self.args.disable_module:
+            self.args.disable_module = [self.cfg_parser.get('DEFAULT', 'disabled_modules', fallback='')]
        # Update config on disk
        with open(self.config_location, 'w') as file:
            self.cfg_parser.write(file)

+    def parse_disabled_modules(self):
+        disabled_modules = self.args.disable_module
+        disabled_modules = self.split_args_input(disabled_modules)
+        disabled_modules = set([name.strip().lower() for name in disabled_modules])
+        self.args.disable_module = disabled_modules
+        logger.debug(f'Disabling the following modules: {", ".join(self.args.disable_module)}')
+
    def create_reddit_instance(self):
        if self.args.authenticate:
            logger.debug('Using authenticated Reddit instance')
--- a/bdfr/downloader.py
+++ b/bdfr/downloader.py
@ -63,7 +63,9 @@ class RedditDownloader(RedditConnector):
        except errors.NotADownloadableLinkError as e:
            logger.error(f'Could not download submission {submission.id}: {e}')
            return
-
+        if downloader_class.__name__.lower() in self.args.disable_module:
+            logger.debug(f'Submission {submission.id} skipped due to disabled module {downloader_class.__name__}')
+            return
        try:
            content = downloader.find_resources(self.authenticator)
        except errors.SiteDownloaderError as e:
--- a/tests/site_downloaders/fallback_downloaders/test_youtubedl_fallback.py
+++ b/tests/site_downloaders/fallback_downloaders/test_youtubedl_fallback.py
--- a/tests/test_connector.py
+++ b/tests/test_connector.py
@ -313,7 +313,9 @@ def test_sanitise_subreddit_name(test_name: str, expected: str):
    (['test1,test2', 'test3'], {'test1', 'test2', 'test3'}),
    (['test1, test2', 'test3'], {'test1', 'test2', 'test3'}),
    (['test1; test2', 'test3'], {'test1', 'test2', 'test3'}),
-    (['test1, test2', 'test1,test2,test3', 'test4'], {'test1', 'test2', 'test3', 'test4'})
+    (['test1, test2', 'test1,test2,test3', 'test4'], {'test1', 'test2', 'test3', 'test4'}),
+    ([''], {''}),
+    (['test'], {'test'}),
 ))
 def test_split_subreddit_entries(test_subreddit_entries: list[str], expected: set[str]):
    results = RedditConnector.split_args_input(test_subreddit_entries)
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@ -348,7 +348,7 @@ def test_cli_download_subreddit_exclusion(test_args: list[str], tmp_path: Path):
    ['--file-scheme', '{TITLE}'],
    ['--file-scheme', '{TITLE}_test_{SUBREDDIT}'],
 ))
-def test_cli_file_scheme_warning(test_args: list[str], tmp_path: Path):
+def test_cli_download_file_scheme_warning(test_args: list[str], tmp_path: Path):
    runner = CliRunner()
    test_args = create_basic_args_for_download_runner(test_args, tmp_path)
    result = runner.invoke(cli, test_args)
@ -356,6 +356,23 @@ def test_cli_file_scheme_warning(test_args: list[str], tmp_path: Path):
    assert 'Some files might not be downloaded due to name conflicts' in result.output


+@pytest.mark.online
+@pytest.mark.reddit
+@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
+@pytest.mark.parametrize('test_args', (
+    ['-l', 'm2601g', '--disable-module', 'Direct'],
+    ['-l', 'nnb9vs', '--disable-module', 'YoutubeDlFallback'],
+    ['-l', 'nnb9vs', '--disable-module', 'youtubedlfallback'],
+))
+def test_cli_download_disable_modules(test_args: list[str], tmp_path: Path):
+    runner = CliRunner()
+    test_args = create_basic_args_for_download_runner(test_args, tmp_path)
+    result = runner.invoke(cli, test_args)
+    assert result.exit_code == 0
+    assert 'skipped due to disabled module' in result.output
+    assert 'Downloaded submission' not in result.output
+
+
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')