Add ability to disable modules (#434)
* Fix test name to match standard * Rename file * Add ability to disable modules * Update README * Fix missing comma * Fix more missing commas. sigh... Co-authored-by: Ali Parlakçı <parlakciali@gmail.com>
This commit is contained in:
parent
434aeb8feb
commit
6dcef83666
21
README.md
21
README.md
|
@ -71,6 +71,10 @@ The following options are common between both the `archive` and `download` comma
|
|||
- `--config`
|
||||
- If the path to a configuration file is supplied with this option, the BDFR will use the specified config
|
||||
- See [Configuration Files](#configuration) for more details
|
||||
- `--disable-module`
|
||||
- Can be specified multiple times
|
||||
- Disables certain modules from being used
|
||||
- See [Disabling Modules](#disabling-modules) for more information and a list of module names
|
||||
- `--log`
|
||||
- This allows one to specify the location of the logfile
|
||||
- This must be done when running multiple instances of the BDFR, see [Multiple Instances](#multiple-instances) below
|
||||
|
@ -266,6 +270,7 @@ The following keys are optional, and defaults will be used if they cannot be fou
|
|||
- `backup_log_count`
|
||||
- `max_wait_time`
|
||||
- `time_format`
|
||||
- `disabled_modules`
|
||||
|
||||
All of these should not be modified unless you know what you're doing, as the default values will enable the BDFR to function just fine. A configuration is included in the BDFR when it is installed, and this will be placed in the configuration directory as the default.
|
||||
|
||||
|
@ -277,6 +282,22 @@ The option `time_format` will specify the format of the timestamp that replaces
|
|||
|
||||
The format can be specified through the [format codes](https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior) that are standard in the Python `datetime` library.
|
||||
|
||||
#### Disabling Modules
|
||||
|
||||
The individual modules of the BDFR, used to download submissions from websites, can be disabled. This is helpful especially in the case of the fallback downloaders, since the `--skip-domain` option cannot be effectively used in these cases. For example, the Youtube-DL downloader can retrieve data from hundreds of websites and domains; thus the only way to fully disable it is via the `--disable-module` option.
|
||||
|
||||
Modules can be disabled through the command line interface for the BDFR or more permanently in the configuration file via the `disabled_modules` option. The list of downloaders that can be disabled are the following. Note that they are case-insensitive.
|
||||
|
||||
- `Direct`
|
||||
- `Erome`
|
||||
- `Gallery` (Reddit Image Galleries)
|
||||
- `Gfycat`
|
||||
- `Imgur`
|
||||
- `Redgifs`
|
||||
- `SelfPost` (Reddit Text Post)
|
||||
- `Youtube`
|
||||
- `YoutubeDlFallback`
|
||||
|
||||
### Rate Limiting
|
||||
|
||||
The option `max_wait_time` has to do with retrying downloads. There are certain HTTP errors that mean that no amount of requests will return the wanted data, but some errors are from rate-limiting. This is when a single client is making so many requests that the remote website cuts the client off to preserve the function of the site. This is a common situation when downloading many resources from the same site. It is polite and best practice to obey the website's wishes in these cases.
|
||||
|
|
|
@ -14,19 +14,20 @@ logger = logging.getLogger()
|
|||
|
||||
_common_options = [
|
||||
click.argument('directory', type=str),
|
||||
click.option('--config', type=str, default=None),
|
||||
click.option('-v', '--verbose', default=None, count=True),
|
||||
click.option('-l', '--link', multiple=True, default=None, type=str),
|
||||
click.option('-s', '--subreddit', multiple=True, default=None, type=str),
|
||||
click.option('-m', '--multireddit', multiple=True, default=None, type=str),
|
||||
click.option('-L', '--limit', default=None, type=int),
|
||||
click.option('--authenticate', is_flag=True, default=None),
|
||||
click.option('--config', type=str, default=None),
|
||||
click.option('--disable-module', multiple=True, default=None, type=str),
|
||||
click.option('--log', type=str, default=None),
|
||||
click.option('--submitted', is_flag=True, default=None),
|
||||
click.option('--upvoted', is_flag=True, default=None),
|
||||
click.option('--saved', is_flag=True, default=None),
|
||||
click.option('--search', default=None, type=str),
|
||||
click.option('--submitted', is_flag=True, default=None),
|
||||
click.option('--time-format', type=str, default=None),
|
||||
click.option('--upvoted', is_flag=True, default=None),
|
||||
click.option('-L', '--limit', default=None, type=int),
|
||||
click.option('-l', '--link', multiple=True, default=None, type=str),
|
||||
click.option('-m', '--multireddit', multiple=True, default=None, type=str),
|
||||
click.option('-s', '--subreddit', multiple=True, default=None, type=str),
|
||||
click.option('-v', '--verbose', default=None, count=True),
|
||||
click.option('-u', '--user', type=str, multiple=True, default=None),
|
||||
click.option('-t', '--time', type=click.Choice(('all', 'hour', 'day', 'week', 'month', 'year')), default=None),
|
||||
click.option('-S', '--sort', type=click.Choice(('hot', 'top', 'new',
|
||||
|
|
|
@ -13,19 +13,21 @@ class Configuration(Namespace):
|
|||
self.authenticate = False
|
||||
self.config = None
|
||||
self.directory: str = '.'
|
||||
self.disable_module: list[str] = []
|
||||
self.exclude_id = []
|
||||
self.exclude_id_file = []
|
||||
self.file_scheme: str = '{REDDITOR}_{TITLE}_{POSTID}'
|
||||
self.folder_scheme: str = '{SUBREDDIT}'
|
||||
self.limit: Optional[int] = None
|
||||
self.link: list[str] = []
|
||||
self.log: Optional[str] = None
|
||||
self.make_hard_links = False
|
||||
self.max_wait_time = None
|
||||
self.multireddit: list[str] = []
|
||||
self.no_dupes: bool = False
|
||||
self.saved: bool = False
|
||||
self.search: Optional[str] = None
|
||||
self.search_existing: bool = False
|
||||
self.file_scheme: str = '{REDDITOR}_{TITLE}_{POSTID}'
|
||||
self.folder_scheme: str = '{SUBREDDIT}'
|
||||
self.skip: list[str] = []
|
||||
self.skip_domain: list[str] = []
|
||||
self.skip_subreddit: list[str] = []
|
||||
|
@ -37,7 +39,6 @@ class Configuration(Namespace):
|
|||
self.upvoted: bool = False
|
||||
self.user: list[str] = []
|
||||
self.verbose: int = 0
|
||||
self.make_hard_links = False
|
||||
|
||||
# Archiver-specific options
|
||||
self.format = 'json'
|
||||
|
|
|
@ -64,6 +64,8 @@ class RedditConnector(metaclass=ABCMeta):
|
|||
|
||||
self.read_config()
|
||||
|
||||
self.parse_disabled_modules()
|
||||
|
||||
self.download_filter = self.create_download_filter()
|
||||
logger.log(9, 'Created download filter')
|
||||
self.time_filter = self.create_time_filter()
|
||||
|
@ -99,10 +101,19 @@ class RedditConnector(metaclass=ABCMeta):
|
|||
option = 'ISO'
|
||||
logger.debug(f'Setting datetime format string to {option}')
|
||||
self.args.time_format = option
|
||||
if not self.args.disable_module:
|
||||
self.args.disable_module = [self.cfg_parser.get('DEFAULT', 'disabled_modules', fallback='')]
|
||||
# Update config on disk
|
||||
with open(self.config_location, 'w') as file:
|
||||
self.cfg_parser.write(file)
|
||||
|
||||
def parse_disabled_modules(self):
|
||||
disabled_modules = self.args.disable_module
|
||||
disabled_modules = self.split_args_input(disabled_modules)
|
||||
disabled_modules = set([name.strip().lower() for name in disabled_modules])
|
||||
self.args.disable_module = disabled_modules
|
||||
logger.debug(f'Disabling the following modules: {", ".join(self.args.disable_module)}')
|
||||
|
||||
def create_reddit_instance(self):
|
||||
if self.args.authenticate:
|
||||
logger.debug('Using authenticated Reddit instance')
|
||||
|
|
|
@ -63,7 +63,9 @@ class RedditDownloader(RedditConnector):
|
|||
except errors.NotADownloadableLinkError as e:
|
||||
logger.error(f'Could not download submission {submission.id}: {e}')
|
||||
return
|
||||
|
||||
if downloader_class.__name__.lower() in self.args.disable_module:
|
||||
logger.debug(f'Submission {submission.id} skipped due to disabled module {downloader_class.__name__}')
|
||||
return
|
||||
try:
|
||||
content = downloader.find_resources(self.authenticator)
|
||||
except errors.SiteDownloaderError as e:
|
||||
|
|
|
@ -313,7 +313,9 @@ def test_sanitise_subreddit_name(test_name: str, expected: str):
|
|||
(['test1,test2', 'test3'], {'test1', 'test2', 'test3'}),
|
||||
(['test1, test2', 'test3'], {'test1', 'test2', 'test3'}),
|
||||
(['test1; test2', 'test3'], {'test1', 'test2', 'test3'}),
|
||||
(['test1, test2', 'test1,test2,test3', 'test4'], {'test1', 'test2', 'test3', 'test4'})
|
||||
(['test1, test2', 'test1,test2,test3', 'test4'], {'test1', 'test2', 'test3', 'test4'}),
|
||||
([''], {''}),
|
||||
(['test'], {'test'}),
|
||||
))
|
||||
def test_split_subreddit_entries(test_subreddit_entries: list[str], expected: set[str]):
|
||||
results = RedditConnector.split_args_input(test_subreddit_entries)
|
||||
|
|
|
@ -348,7 +348,7 @@ def test_cli_download_subreddit_exclusion(test_args: list[str], tmp_path: Path):
|
|||
['--file-scheme', '{TITLE}'],
|
||||
['--file-scheme', '{TITLE}_test_{SUBREDDIT}'],
|
||||
))
|
||||
def test_cli_file_scheme_warning(test_args: list[str], tmp_path: Path):
|
||||
def test_cli_download_file_scheme_warning(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = create_basic_args_for_download_runner(test_args, tmp_path)
|
||||
result = runner.invoke(cli, test_args)
|
||||
|
@ -356,6 +356,23 @@ def test_cli_file_scheme_warning(test_args: list[str], tmp_path: Path):
|
|||
assert 'Some files might not be downloaded due to name conflicts' in result.output
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['-l', 'm2601g', '--disable-module', 'Direct'],
|
||||
['-l', 'nnb9vs', '--disable-module', 'YoutubeDlFallback'],
|
||||
['-l', 'nnb9vs', '--disable-module', 'youtubedlfallback'],
|
||||
))
|
||||
def test_cli_download_disable_modules(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = create_basic_args_for_download_runner(test_args, tmp_path)
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert 'skipped due to disabled module' in result.output
|
||||
assert 'Downloaded submission' not in result.output
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
|
|
Loading…
Reference in a new issue