Merge pull request #634 from chapmanjacobd/patch-4
This commit is contained in:
commit
cd6bcd82ef
|
@ -227,6 +227,15 @@ The following options apply only to the `download` command. This command downloa
|
|||
- This skips all submissions from the specified subreddit
|
||||
- Can be specified multiple times
|
||||
- Also accepts CSV subreddit names
|
||||
- `--min-score`
|
||||
- This skips all submissions which have fewer than specified upvotes
|
||||
- `--max-score`
|
||||
- This skips all submissions which have more than specified upvotes
|
||||
- `--min-score-ratio`
|
||||
- This skips all submissions which have lower than specified upvote ratio
|
||||
- `--max-score-ratio`
|
||||
- This skips all submissions which have higher than specified upvote ratio
|
||||
|
||||
|
||||
### Archiver Options
|
||||
|
||||
|
|
|
@ -50,6 +50,10 @@ _downloader_options = [
|
|||
click.option('--skip', default=None, multiple=True),
|
||||
click.option('--skip-domain', default=None, multiple=True),
|
||||
click.option('--skip-subreddit', default=None, multiple=True),
|
||||
click.option('--min-score', type=int, default=None),
|
||||
click.option('--max-score', type=int, default=None),
|
||||
click.option('--min-score-ratio', type=float, default=None),
|
||||
click.option('--max-score-ratio', type=float, default=None),
|
||||
]
|
||||
|
||||
_archiver_options = [
|
||||
|
|
|
@ -38,6 +38,10 @@ class Configuration(Namespace):
|
|||
self.skip: list[str] = []
|
||||
self.skip_domain: list[str] = []
|
||||
self.skip_subreddit: list[str] = []
|
||||
self.min_score = None
|
||||
self.max_score = None
|
||||
self.min_score_ratio = None
|
||||
self.max_score_ratio = None
|
||||
self.sort: str = 'hot'
|
||||
self.submitted: bool = False
|
||||
self.subscribed: bool = False
|
||||
|
|
|
@ -57,6 +57,19 @@ class RedditDownloader(RedditConnector):
|
|||
f'Submission {submission.id} in {submission.subreddit.display_name} skipped'
|
||||
f' due to {submission.author.name if submission.author else "DELETED"} being an ignored user')
|
||||
return
|
||||
elif self.args.min_score and submission.score < self.args.min_score:
|
||||
logger.debug(
|
||||
f"Submission {submission.id} filtered due to score {submission.score} < [{self.args.min_score}]")
|
||||
return
|
||||
elif self.args.max_score and self.args.max_score < submission.score:
|
||||
logger.debug(
|
||||
f"Submission {submission.id} filtered due to score {submission.score} > [{self.args.max_score}]")
|
||||
return
|
||||
elif (self.args.min_score_ratio and submission.upvote_ratio < self.args.min_score_ratio) or (
|
||||
self.args.max_score_ratio and self.args.max_score_ratio < submission.upvote_ratio
|
||||
):
|
||||
logger.debug(f"Submission {submission.id} filtered due to score ratio ({submission.upvote_ratio})")
|
||||
return
|
||||
elif not isinstance(submission, praw.models.Submission):
|
||||
logger.warning(f'{submission.id} is not a submission')
|
||||
return
|
||||
|
|
|
@ -13,4 +13,5 @@ fi
|
|||
grep 'Download filter' "$file" | awk '{ print $(NF-3) }' ;
|
||||
grep 'already exists, continuing' "$file" | awk '{ print $(NF-3) }' ;
|
||||
grep 'Hard link made' "$file" | awk '{ print $(NF) }' ;
|
||||
grep 'filtered due to score' "$file" | awk '{ print $9 }'
|
||||
}
|
||||
|
|
2
scripts/tests/example_logfiles/succeed_score_filter.txt
Normal file
2
scripts/tests/example_logfiles/succeed_score_filter.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
[2022-07-23 14:04:14,095 - bdfr.downloader - DEBUG] - Submission ljyy27 filtered due to score 15 < [50]
|
||||
[2022-07-23 14:04:14,104 - bdfr.downloader - DEBUG] - Submission ljyy27 filtered due to score 16 > [1]
|
|
@ -13,31 +13,36 @@ teardown() {
|
|||
}
|
||||
|
||||
@test "fail no downloader module" {
|
||||
run ../extract_failed_ids.sh ./example_logfiles/failed_no_downloader.txt >> failed.txt
|
||||
run ../extract_failed_ids.sh ./example_logfiles/failed_no_downloader.txt
|
||||
echo "$output" > failed.txt
|
||||
assert [ "$( wc -l 'failed.txt' | awk '{ print $1 }' )" -eq "3" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'failed.txt' )" -eq "0" ];
|
||||
}
|
||||
|
||||
@test "fail resource error" {
|
||||
run ../extract_failed_ids.sh ./example_logfiles/failed_resource_error.txt >> failed.txt
|
||||
run ../extract_failed_ids.sh ./example_logfiles/failed_resource_error.txt
|
||||
echo "$output" > failed.txt
|
||||
assert [ "$( wc -l 'failed.txt' | awk '{ print $1 }' )" -eq "1" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'failed.txt' )" -eq "0" ];
|
||||
}
|
||||
|
||||
@test "fail site downloader error" {
|
||||
run ../extract_failed_ids.sh ./example_logfiles/failed_sitedownloader_error.txt >> failed.txt
|
||||
run ../extract_failed_ids.sh ./example_logfiles/failed_sitedownloader_error.txt
|
||||
echo "$output" > failed.txt
|
||||
assert [ "$( wc -l 'failed.txt' | awk '{ print $1 }' )" -eq "2" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'failed.txt' )" -eq "0" ];
|
||||
}
|
||||
|
||||
@test "fail failed file write" {
|
||||
run ../extract_failed_ids.sh ./example_logfiles/failed_write_error.txt >> failed.txt
|
||||
run ../extract_failed_ids.sh ./example_logfiles/failed_write_error.txt
|
||||
echo "$output" > failed.txt
|
||||
assert [ "$( wc -l 'failed.txt' | awk '{ print $1 }' )" -eq "1" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'failed.txt' )" -eq "0" ];
|
||||
}
|
||||
|
||||
@test "fail disabled module" {
|
||||
run ../extract_failed_ids.sh ./example_logfiles/failed_disabled_module.txt >> failed.txt
|
||||
run ../extract_failed_ids.sh ./example_logfiles/failed_disabled_module.txt
|
||||
echo "$output" > failed.txt
|
||||
assert [ "$( wc -l 'failed.txt' | awk '{ print $1 }' )" -eq "1" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'failed.txt' )" -eq "0" ];
|
||||
}
|
||||
|
|
|
@ -8,31 +8,43 @@ teardown() {
|
|||
}
|
||||
|
||||
@test "success downloaded submission" {
|
||||
run ../extract_successful_ids.sh ./example_logfiles/succeed_downloaded_submission.txt >> ./successful.txt
|
||||
run ../extract_successful_ids.sh ./example_logfiles/succeed_downloaded_submission.txt
|
||||
echo "$output" > successful.txt
|
||||
assert [ "$( wc -l 'successful.txt' | awk '{ print $1 }' )" -eq "7" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'successful.txt' )" -eq "0" ];
|
||||
}
|
||||
|
||||
@test "success resource hash" {
|
||||
run ../extract_successful_ids.sh ./example_logfiles/succeed_resource_hash.txt >> ./successful.txt
|
||||
run ../extract_successful_ids.sh ./example_logfiles/succeed_resource_hash.txt
|
||||
echo "$output" > successful.txt
|
||||
assert [ "$( wc -l 'successful.txt' | awk '{ print $1 }' )" -eq "1" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'successful.txt' )" -eq "0" ];
|
||||
}
|
||||
|
||||
@test "success download filter" {
|
||||
run ../extract_successful_ids.sh ./example_logfiles/succeed_download_filter.txt >> ./successful.txt
|
||||
run ../extract_successful_ids.sh ./example_logfiles/succeed_download_filter.txt
|
||||
echo "$output" > successful.txt
|
||||
assert [ "$( wc -l 'successful.txt' | awk '{ print $1 }' )" -eq "3" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'successful.txt' )" -eq "0" ];
|
||||
}
|
||||
|
||||
@test "success already exists" {
|
||||
run ../extract_successful_ids.sh ./example_logfiles/succeed_already_exists.txt >> ./successful.txt
|
||||
run ../extract_successful_ids.sh ./example_logfiles/succeed_already_exists.txt
|
||||
echo "$output" > successful.txt
|
||||
assert [ "$( wc -l 'successful.txt' | awk '{ print $1 }' )" -eq "3" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'successful.txt' )" -eq "0" ];
|
||||
}
|
||||
|
||||
@test "success hard link" {
|
||||
run ../extract_successful_ids.sh ./example_logfiles/succeed_hard_link.txt >> ./successful.txt
|
||||
run ../extract_successful_ids.sh ./example_logfiles/succeed_hard_link.txt
|
||||
echo "$output" > successful.txt
|
||||
assert [ "$( wc -l 'successful.txt' | awk '{ print $1 }' )" -eq "1" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'successful.txt' )" -eq "0" ];
|
||||
}
|
||||
|
||||
@test "success score filter" {
|
||||
run ../extract_successful_ids.sh ./example_logfiles/succeed_score_filter.txt
|
||||
echo "$output" > successful.txt
|
||||
assert [ "$( wc -l 'successful.txt' | awk '{ print $1 }' )" -eq "2" ];
|
||||
assert [ "$( grep -Ecv '\w{6,7}' 'successful.txt' )" -eq "0" ];
|
||||
}
|
||||
|
|
|
@ -351,3 +351,20 @@ def test_cli_download_ignore_user(test_args: list[str], tmp_path: Path):
|
|||
assert result.exit_code == 0
|
||||
assert 'Downloaded submission' not in result.output
|
||||
assert 'being an ignored user' in result.output
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
|
||||
@pytest.mark.parametrize(('test_args', 'was_filtered'), (
|
||||
(['-l', 'ljyy27', '--min-score', '50'], True),
|
||||
(['-l', 'ljyy27', '--min-score', '1'], False),
|
||||
(['-l', 'ljyy27', '--max-score', '1'], True),
|
||||
(['-l', 'ljyy27', '--max-score', '100'], False),
|
||||
))
|
||||
def test_cli_download_score_filter(test_args: list[str], was_filtered: bool, tmp_path: Path):
|
||||
runner = CliRunner()
|
||||
test_args = create_basic_args_for_download_runner(test_args, tmp_path)
|
||||
result = runner.invoke(cli, test_args)
|
||||
assert result.exit_code == 0
|
||||
assert ('filtered due to score' in result.output) == was_filtered
|
||||
|
|
|
@ -200,3 +200,107 @@ def test_download_submission(
|
|||
RedditDownloader._download_submission(downloader_mock, submission)
|
||||
folder_contents = list(tmp_path.iterdir())
|
||||
assert len(folder_contents) == expected_files_len
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.parametrize(('test_submission_id', 'min_score'), (
|
||||
('ljyy27', 1),
|
||||
))
|
||||
def test_download_submission_min_score_above(
|
||||
test_submission_id: str,
|
||||
min_score: int,
|
||||
downloader_mock: MagicMock,
|
||||
reddit_instance: praw.Reddit,
|
||||
tmp_path: Path,
|
||||
capsys: pytest.CaptureFixture,
|
||||
):
|
||||
setup_logging(3)
|
||||
downloader_mock.reddit_instance = reddit_instance
|
||||
downloader_mock.download_filter.check_url.return_value = True
|
||||
downloader_mock.args.folder_scheme = ''
|
||||
downloader_mock.args.min_score = min_score
|
||||
downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock)
|
||||
downloader_mock.download_directory = tmp_path
|
||||
submission = downloader_mock.reddit_instance.submission(id=test_submission_id)
|
||||
RedditDownloader._download_submission(downloader_mock, submission)
|
||||
output = capsys.readouterr()
|
||||
assert 'filtered due to score' not in output.out
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.parametrize(('test_submission_id', 'min_score'), (
|
||||
('ljyy27', 25),
|
||||
))
|
||||
def test_download_submission_min_score_below(
|
||||
test_submission_id: str,
|
||||
min_score: int,
|
||||
downloader_mock: MagicMock,
|
||||
reddit_instance: praw.Reddit,
|
||||
tmp_path: Path,
|
||||
capsys: pytest.CaptureFixture,
|
||||
):
|
||||
setup_logging(3)
|
||||
downloader_mock.reddit_instance = reddit_instance
|
||||
downloader_mock.download_filter.check_url.return_value = True
|
||||
downloader_mock.args.folder_scheme = ''
|
||||
downloader_mock.args.min_score = min_score
|
||||
downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock)
|
||||
downloader_mock.download_directory = tmp_path
|
||||
submission = downloader_mock.reddit_instance.submission(id=test_submission_id)
|
||||
RedditDownloader._download_submission(downloader_mock, submission)
|
||||
output = capsys.readouterr()
|
||||
assert 'filtered due to score' in output.out
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.parametrize(('test_submission_id', 'max_score'), (
|
||||
('ljyy27', 25),
|
||||
))
|
||||
def test_download_submission_max_score_below(
|
||||
test_submission_id: str,
|
||||
max_score: int,
|
||||
downloader_mock: MagicMock,
|
||||
reddit_instance: praw.Reddit,
|
||||
tmp_path: Path,
|
||||
capsys: pytest.CaptureFixture,
|
||||
):
|
||||
setup_logging(3)
|
||||
downloader_mock.reddit_instance = reddit_instance
|
||||
downloader_mock.download_filter.check_url.return_value = True
|
||||
downloader_mock.args.folder_scheme = ''
|
||||
downloader_mock.args.max_score = max_score
|
||||
downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock)
|
||||
downloader_mock.download_directory = tmp_path
|
||||
submission = downloader_mock.reddit_instance.submission(id=test_submission_id)
|
||||
RedditDownloader._download_submission(downloader_mock, submission)
|
||||
output = capsys.readouterr()
|
||||
assert 'filtered due to score' not in output.out
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
@pytest.mark.reddit
|
||||
@pytest.mark.parametrize(('test_submission_id', 'max_score'), (
|
||||
('ljyy27', 1),
|
||||
))
|
||||
def test_download_submission_max_score_above(
|
||||
test_submission_id: str,
|
||||
max_score: int,
|
||||
downloader_mock: MagicMock,
|
||||
reddit_instance: praw.Reddit,
|
||||
tmp_path: Path,
|
||||
capsys: pytest.CaptureFixture,
|
||||
):
|
||||
setup_logging(3)
|
||||
downloader_mock.reddit_instance = reddit_instance
|
||||
downloader_mock.download_filter.check_url.return_value = True
|
||||
downloader_mock.args.folder_scheme = ''
|
||||
downloader_mock.args.max_score = max_score
|
||||
downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock)
|
||||
downloader_mock.download_directory = tmp_path
|
||||
submission = downloader_mock.reddit_instance.submission(id=test_submission_id)
|
||||
RedditDownloader._download_submission(downloader_mock, submission)
|
||||
output = capsys.readouterr()
|
||||
assert 'filtered due to score' in output.out
|
||||
|
|
Loading…
Reference in a new issue