1
0
Fork 0
mirror of synced 2024-05-19 11:42:40 +12:00

Add option to skip specified subreddits (#268)

* Rename variables

* Add option to skip specific subreddits

* Update README
This commit is contained in:
Serene 2021-04-17 19:56:43 +10:00 committed by Ali Parlakci
parent c85ae3fc69
commit d8752b15fa
5 changed files with 33 additions and 6 deletions

View file

@ -139,6 +139,10 @@ The following options apply only to the `download` command. This command downloa
- `--skip`
- This adds file types to the download filter i.e. submissions with one of the supplied file extensions will not be downloaded
- Can be specified multiple times
- `--skip-subreddit`
- This skips all submissions from the specified subreddit
- Can be specified multiple times
- Also accepts CSV subreddit names
#### Archiver Options

View file

@ -53,6 +53,7 @@ def cli():
@click.option('--search-existing', is_flag=True, default=None)
@click.option('--skip', default=None, multiple=True)
@click.option('--skip-domain', default=None, multiple=True)
@click.option('--skip-subreddit', default=None, multiple=True)
@_add_common_options
@click.pass_context
def cli_download(context: click.Context, **_):

View file

@ -27,6 +27,7 @@ class Configuration(Namespace):
self.folder_scheme: str = '{SUBREDDIT}'
self.skip: list[str] = []
self.skip_domain: list[str] = []
self.skip_subreddit: list[str] = []
self.sort: str = 'hot'
self.submitted: bool = False
self.subreddit: list[str] = []

View file

@ -93,6 +93,9 @@ class RedditDownloader:
self.authenticator = self._create_authenticator()
logger.log(9, 'Created site authenticator')
self.args.skip_subreddit = self._split_args_input(self.args.skip_subreddit)
self.args.skip_subreddit = set([sub.lower() for sub in self.args.skip_subreddit])
def _read_config(self):
"""Read any cfg values that need to be processed"""
if self.args.max_wait_time is None:
@ -210,13 +213,13 @@ class RedditDownloader:
return match.group(1)
@staticmethod
def _split_args_input(subreddit_entries: list[str]) -> set[str]:
all_subreddits = []
def _split_args_input(entries: list[str]) -> set[str]:
all_entries = []
split_pattern = re.compile(r'[,;]\s?')
for entry in subreddit_entries:
for entry in entries:
results = re.split(split_pattern, entry)
all_subreddits.extend([RedditDownloader._sanitise_subreddit_name(name) for name in results])
return set(all_subreddits)
all_entries.extend([RedditDownloader._sanitise_subreddit_name(name) for name in results])
return set(all_entries)
def _get_subreddits(self) -> list[praw.models.ListingGenerator]:
if self.args.subreddit:
@ -354,8 +357,10 @@ class RedditDownloader:
for generator in self.reddit_lists:
for submission in generator:
if submission.id in self.excluded_submission_ids:
logger.debug(f'Submission {submission.id} in exclusion list, skipping')
logger.debug(f'Object {submission.id} in exclusion list, skipping')
continue
elif submission.subreddit.display_name.lower() in self.args.skip_subreddit:
logger.debug(f'Submission {submission.id} in {submission.subreddit.display_name} in skip list')
else:
logger.debug(f'Attempting to download submission {submission.id}')
self._download_submission(submission)

View file

@ -284,6 +284,22 @@ def test_cli_download_links_exclusion(test_args: list[str], tmp_path: Path):
assert 'Downloaded submission ' not in result.output
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
@pytest.mark.parametrize('test_args', (
['-l', 'm2601g', '--skip-subreddit', 'trollxchromosomes'],
['-s', 'trollxchromosomes', '--skip-subreddit', 'trollxchromosomes', '-L', '3'],
))
def test_cli_download_subreddit_exclusion(test_args: list[str], tmp_path: Path):
runner = CliRunner()
test_args = ['download', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert 'in skip list' in result.output
assert 'Downloaded submission ' not in result.output
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')