1
0
Fork 0
mirror of synced 2024-05-19 11:42:40 +12:00

Add ability to disable modules (#434)

* Fix test name to match standard

* Rename file

* Add ability to disable modules

* Update README

* Fix missing comma

* Fix more missing commas. sigh...

Co-authored-by: Ali Parlakçı <parlakciali@gmail.com>
This commit is contained in:
Serene 2021-06-06 20:47:56 +10:00 committed by GitHub
parent 434aeb8feb
commit 6dcef83666
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 69 additions and 14 deletions

View file

@ -71,6 +71,10 @@ The following options are common between both the `archive` and `download` comma
- `--config`
- If the path to a configuration file is supplied with this option, the BDFR will use the specified config
- See [Configuration Files](#configuration) for more details
- `--disable-module`
- Can be specified multiple times
- Disables certain modules from being used
- See [Disabling Modules](#disabling-modules) for more information and a list of module names
- `--log`
- This allows one to specify the location of the logfile
- This must be done when running multiple instances of the BDFR, see [Multiple Instances](#multiple-instances) below
@ -266,6 +270,7 @@ The following keys are optional, and defaults will be used if they cannot be fou
- `backup_log_count`
- `max_wait_time`
- `time_format`
- `disabled_modules`
All of these should not be modified unless you know what you're doing, as the default values will enable the BDFR to function just fine. A configuration is included in the BDFR when it is installed, and this will be placed in the configuration directory as the default.
@ -277,6 +282,22 @@ The option `time_format` will specify the format of the timestamp that replaces
The format can be specified through the [format codes](https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior) that are standard in the Python `datetime` library.
#### Disabling Modules
The individual modules of the BDFR, used to download submissions from websites, can be disabled. This is helpful especially in the case of the fallback downloaders, since the `--skip-domain` option cannot be effectively used in these cases. For example, the Youtube-DL downloader can retrieve data from hundreds of websites and domains; thus the only way to fully disable it is via the `--disable-module` option.
Modules can be disabled through the command line interface for the BDFR or more permanently in the configuration file via the `disabled_modules` option. The list of downloaders that can be disabled are the following. Note that they are case-insensitive.
- `Direct`
- `Erome`
- `Gallery` (Reddit Image Galleries)
- `Gfycat`
- `Imgur`
- `Redgifs`
- `SelfPost` (Reddit Text Post)
- `Youtube`
- `YoutubeDlFallback`
### Rate Limiting
The option `max_wait_time` has to do with retrying downloads. There are certain HTTP errors that mean that no amount of requests will return the wanted data, but some errors are from rate-limiting. This is when a single client is making so many requests that the remote website cuts the client off to preserve the function of the site. This is a common situation when downloading many resources from the same site. It is polite and best practice to obey the website's wishes in these cases.

View file

@ -14,19 +14,20 @@ logger = logging.getLogger()
_common_options = [
click.argument('directory', type=str),
click.option('--config', type=str, default=None),
click.option('-v', '--verbose', default=None, count=True),
click.option('-l', '--link', multiple=True, default=None, type=str),
click.option('-s', '--subreddit', multiple=True, default=None, type=str),
click.option('-m', '--multireddit', multiple=True, default=None, type=str),
click.option('-L', '--limit', default=None, type=int),
click.option('--authenticate', is_flag=True, default=None),
click.option('--config', type=str, default=None),
click.option('--disable-module', multiple=True, default=None, type=str),
click.option('--log', type=str, default=None),
click.option('--submitted', is_flag=True, default=None),
click.option('--upvoted', is_flag=True, default=None),
click.option('--saved', is_flag=True, default=None),
click.option('--search', default=None, type=str),
click.option('--submitted', is_flag=True, default=None),
click.option('--time-format', type=str, default=None),
click.option('--upvoted', is_flag=True, default=None),
click.option('-L', '--limit', default=None, type=int),
click.option('-l', '--link', multiple=True, default=None, type=str),
click.option('-m', '--multireddit', multiple=True, default=None, type=str),
click.option('-s', '--subreddit', multiple=True, default=None, type=str),
click.option('-v', '--verbose', default=None, count=True),
click.option('-u', '--user', type=str, multiple=True, default=None),
click.option('-t', '--time', type=click.Choice(('all', 'hour', 'day', 'week', 'month', 'year')), default=None),
click.option('-S', '--sort', type=click.Choice(('hot', 'top', 'new',

View file

@ -13,19 +13,21 @@ class Configuration(Namespace):
self.authenticate = False
self.config = None
self.directory: str = '.'
self.disable_module: list[str] = []
self.exclude_id = []
self.exclude_id_file = []
self.file_scheme: str = '{REDDITOR}_{TITLE}_{POSTID}'
self.folder_scheme: str = '{SUBREDDIT}'
self.limit: Optional[int] = None
self.link: list[str] = []
self.log: Optional[str] = None
self.make_hard_links = False
self.max_wait_time = None
self.multireddit: list[str] = []
self.no_dupes: bool = False
self.saved: bool = False
self.search: Optional[str] = None
self.search_existing: bool = False
self.file_scheme: str = '{REDDITOR}_{TITLE}_{POSTID}'
self.folder_scheme: str = '{SUBREDDIT}'
self.skip: list[str] = []
self.skip_domain: list[str] = []
self.skip_subreddit: list[str] = []
@ -37,7 +39,6 @@ class Configuration(Namespace):
self.upvoted: bool = False
self.user: list[str] = []
self.verbose: int = 0
self.make_hard_links = False
# Archiver-specific options
self.format = 'json'

View file

@ -64,6 +64,8 @@ class RedditConnector(metaclass=ABCMeta):
self.read_config()
self.parse_disabled_modules()
self.download_filter = self.create_download_filter()
logger.log(9, 'Created download filter')
self.time_filter = self.create_time_filter()
@ -99,10 +101,19 @@ class RedditConnector(metaclass=ABCMeta):
option = 'ISO'
logger.debug(f'Setting datetime format string to {option}')
self.args.time_format = option
if not self.args.disable_module:
self.args.disable_module = [self.cfg_parser.get('DEFAULT', 'disabled_modules', fallback='')]
# Update config on disk
with open(self.config_location, 'w') as file:
self.cfg_parser.write(file)
def parse_disabled_modules(self):
disabled_modules = self.args.disable_module
disabled_modules = self.split_args_input(disabled_modules)
disabled_modules = set([name.strip().lower() for name in disabled_modules])
self.args.disable_module = disabled_modules
logger.debug(f'Disabling the following modules: {", ".join(self.args.disable_module)}')
def create_reddit_instance(self):
if self.args.authenticate:
logger.debug('Using authenticated Reddit instance')

View file

@ -63,7 +63,9 @@ class RedditDownloader(RedditConnector):
except errors.NotADownloadableLinkError as e:
logger.error(f'Could not download submission {submission.id}: {e}')
return
if downloader_class.__name__.lower() in self.args.disable_module:
logger.debug(f'Submission {submission.id} skipped due to disabled module {downloader_class.__name__}')
return
try:
content = downloader.find_resources(self.authenticator)
except errors.SiteDownloaderError as e:

View file

@ -313,7 +313,9 @@ def test_sanitise_subreddit_name(test_name: str, expected: str):
(['test1,test2', 'test3'], {'test1', 'test2', 'test3'}),
(['test1, test2', 'test3'], {'test1', 'test2', 'test3'}),
(['test1; test2', 'test3'], {'test1', 'test2', 'test3'}),
(['test1, test2', 'test1,test2,test3', 'test4'], {'test1', 'test2', 'test3', 'test4'})
(['test1, test2', 'test1,test2,test3', 'test4'], {'test1', 'test2', 'test3', 'test4'}),
([''], {''}),
(['test'], {'test'}),
))
def test_split_subreddit_entries(test_subreddit_entries: list[str], expected: set[str]):
results = RedditConnector.split_args_input(test_subreddit_entries)

View file

@ -348,7 +348,7 @@ def test_cli_download_subreddit_exclusion(test_args: list[str], tmp_path: Path):
['--file-scheme', '{TITLE}'],
['--file-scheme', '{TITLE}_test_{SUBREDDIT}'],
))
def test_cli_file_scheme_warning(test_args: list[str], tmp_path: Path):
def test_cli_download_file_scheme_warning(test_args: list[str], tmp_path: Path):
runner = CliRunner()
test_args = create_basic_args_for_download_runner(test_args, tmp_path)
result = runner.invoke(cli, test_args)
@ -356,6 +356,23 @@ def test_cli_file_scheme_warning(test_args: list[str], tmp_path: Path):
assert 'Some files might not be downloaded due to name conflicts' in result.output
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
@pytest.mark.parametrize('test_args', (
['-l', 'm2601g', '--disable-module', 'Direct'],
['-l', 'nnb9vs', '--disable-module', 'YoutubeDlFallback'],
['-l', 'nnb9vs', '--disable-module', 'youtubedlfallback'],
))
def test_cli_download_disable_modules(test_args: list[str], tmp_path: Path):
runner = CliRunner()
test_args = create_basic_args_for_download_runner(test_args, tmp_path)
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert 'skipped due to disabled module' in result.output
assert 'Downloaded submission' not in result.output
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')