Add option to skip specified subreddits (#268)
* Rename variables * Add option to skip specific subreddits * Update README
This commit is contained in:
parent
c85ae3fc69
commit
d8752b15fa
|
@ -139,6 +139,10 @@ The following options apply only to the `download` command. This command downloa
|
|||
- `--skip`
|
||||
- This adds file types to the download filter i.e. submissions with one of the supplied file extensions will not be downloaded
|
||||
- Can be specified multiple times
|
||||
- `--skip-subreddit`
|
||||
- This skips all submissions from the specified subreddit
|
||||
- Can be specified multiple times
|
||||
- Also accepts CSV subreddit names
|
||||
|
||||
#### Archiver Options
|
||||
|
||||
|
|
|
@ -53,6 +53,7 @@ def cli():
|
|||
@click.option('--search-existing', is_flag=True, default=None)
|
||||
@click.option('--skip', default=None, multiple=True)
|
||||
@click.option('--skip-domain', default=None, multiple=True)
|
||||
@click.option('--skip-subreddit', default=None, multiple=True)
|
||||
@_add_common_options
|
||||
@click.pass_context
|
||||
def cli_download(context: click.Context, **_):
|
||||
|
|
|
@ -27,6 +27,7 @@ class Configuration(Namespace):
|
|||
self.folder_scheme: str = '{SUBREDDIT}'
|
||||
self.skip: list[str] = []
|
||||
self.skip_domain: list[str] = []
|
||||
self.skip_subreddit: list[str] = []
|
||||
self.sort: str = 'hot'
|
||||
self.submitted: bool = False
|
||||
self.subreddit: list[str] = []
|
||||
|
|
|
@ -93,6 +93,9 @@ class RedditDownloader:
|
|||
self.authenticator = self._create_authenticator()
|
||||
logger.log(9, 'Created site authenticator')
|
||||
|
||||
self.args.skip_subreddit = self._split_args_input(self.args.skip_subreddit)
|
||||
self.args.skip_subreddit = set([sub.lower() for sub in self.args.skip_subreddit])
|
||||
|
||||
def _read_config(self):
|
||||
"""Read any cfg values that need to be processed"""
|
||||
if self.args.max_wait_time is None:
|
||||
|
@ -210,13 +213,13 @@ class RedditDownloader:
|
|||
return match.group(1)
|
||||
|
||||
@staticmethod
|
||||
def _split_args_input(subreddit_entries: list[str]) -> set[str]:
|
||||
all_subreddits = []
|
||||
def _split_args_input(entries: list[str]) -> set[str]:
|
||||
all_entries = []
|
||||
split_pattern = re.compile(r'[,;]\s?')
|
||||
for entry in subreddit_entries:
|
||||
for entry in entries:
|
||||
results = re.split(split_pattern, entry)
|
||||
all_subreddits.extend([RedditDownloader._sanitise_subreddit_name(name) for name in results])
|
||||
return set(all_subreddits)
|
||||
all_entries.extend([RedditDownloader._sanitise_subreddit_name(name) for name in results])
|
||||
return set(all_entries)
|
||||
|
||||
def _get_subreddits(self) -> list[praw.models.ListingGenerator]:
|
||||
if self.args.subreddit:
|
||||
|
@ -354,8 +357,10 @@ class RedditDownloader:
|
|||
for generator in self.reddit_lists:
|
||||
for submission in generator:
|
||||
if submission.id in self.excluded_submission_ids:
|
||||
logger.debug(f'Submission {submission.id} in exclusion list, skipping')
|
||||
logger.debug(f'Object {submission.id} in exclusion list, skipping')
|
||||
continue
|
||||
elif submission.subreddit.display_name.lower() in self.args.skip_subreddit:
|
||||
logger.debug(f'Submission {submission.id} in {submission.subreddit.display_name} in skip list')
|
||||
else:
|
||||
logger.debug(f'Attempting to download submission {submission.id}')
|
||||
self._download_submission(submission)
|
||||
|
|
|
@ -284,6 +284,22 @@ def test_cli_download_links_exclusion(test_args: list[str], tmp_path: Path):
|
|||
assert 'Downloaded submission ' not in result.output
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize('test_args', (
|
||||
['-l', 'm2601g', '--skip-subreddit', 'trollxchromosomes'],
|
||||
['-s', 'trollxchromosomes', '--skip-subreddit', 'trollxchromosomes', '-L', '3'],
|
||||
))
|
||||
def test_cli_download_subreddit_exclusion(test_args: list[str], tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = ['download', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert 'in skip list' in result.output
|
||||
assert 'Downloaded submission ' not in result.output
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
|
||||
|
|
Loading…
Reference in a new issue