1
0
Fork 0
mirror of synced 2024-06-02 18:34:37 +12:00

Merge pull request #713 from OMEGARAZER/development

This commit is contained in:
Serene 2022-12-16 10:59:35 +10:00 committed by GitHub
commit 58e1d1a8f9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 158 additions and 47 deletions

View file

@ -4,6 +4,7 @@
import json import json
import logging import logging
import re import re
from time import sleep
from typing import Iterator, Union from typing import Iterator, Union
import dict2xml import dict2xml
@ -28,23 +29,28 @@ class Archiver(RedditConnector):
def download(self): def download(self):
for generator in self.reddit_lists: for generator in self.reddit_lists:
for submission in generator: try:
try: for submission in generator:
if (submission.author and submission.author.name in self.args.ignore_user) or ( try:
submission.author is None and "DELETED" in self.args.ignore_user if (submission.author and submission.author.name in self.args.ignore_user) or (
): submission.author is None and "DELETED" in self.args.ignore_user
logger.debug( ):
f"Submission {submission.id} in {submission.subreddit.display_name} skipped" logger.debug(
f" due to {submission.author.name if submission.author else 'DELETED'} being an ignored user" f"Submission {submission.id} in {submission.subreddit.display_name} skipped due to"
) f" {submission.author.name if submission.author else 'DELETED'} being an ignored user"
continue )
if submission.id in self.excluded_submission_ids: continue
logger.debug(f"Object {submission.id} in exclusion list, skipping") if submission.id in self.excluded_submission_ids:
continue logger.debug(f"Object {submission.id} in exclusion list, skipping")
logger.debug(f"Attempting to archive submission {submission.id}") continue
self.write_entry(submission) logger.debug(f"Attempting to archive submission {submission.id}")
except prawcore.PrawcoreException as e: self.write_entry(submission)
logger.error(f"Submission {submission.id} failed to be archived due to a PRAW exception: {e}") except prawcore.PrawcoreException as e:
logger.error(f"Submission {submission.id} failed to be archived due to a PRAW exception: {e}")
except prawcore.PrawcoreException as e:
logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}")
logger.debug("Waiting 60 seconds to continue")
sleep(60)
def get_submissions_from_link(self) -> list[list[praw.models.Submission]]: def get_submissions_from_link(self) -> list[list[praw.models.Submission]]:
supplied_submissions = [] supplied_submissions = []

View file

@ -2,6 +2,7 @@
# coding=utf-8 # coding=utf-8
import logging import logging
from time import sleep
import prawcore import prawcore
@ -18,9 +19,14 @@ class RedditCloner(RedditDownloader, Archiver):
def download(self): def download(self):
for generator in self.reddit_lists: for generator in self.reddit_lists:
for submission in generator: try:
try: for submission in generator:
self._download_submission(submission) try:
self.write_entry(submission) self._download_submission(submission)
except prawcore.PrawcoreException as e: self.write_entry(submission)
logger.error(f"Submission {submission.id} failed to be cloned due to a PRAW exception: {e}") except prawcore.PrawcoreException as e:
logger.error(f"Submission {submission.id} failed to be cloned due to a PRAW exception: {e}")
except prawcore.PrawcoreException as e:
logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}")
logger.debug("Waiting 60 seconds to continue")
sleep(60)

View file

@ -13,6 +13,7 @@ from abc import ABCMeta, abstractmethod
from datetime import datetime from datetime import datetime
from enum import Enum, auto from enum import Enum, auto
from pathlib import Path from pathlib import Path
from time import sleep
from typing import Callable, Iterator from typing import Callable, Iterator
import appdirs import appdirs
@ -353,26 +354,31 @@ class RedditConnector(metaclass=ABCMeta):
generators = [] generators = []
for user in self.args.user: for user in self.args.user:
try: try:
self.check_user_existence(user) try:
except errors.BulkDownloaderException as e: self.check_user_existence(user)
logger.error(e) except errors.BulkDownloaderException as e:
continue logger.error(e)
if self.args.submitted: continue
logger.debug(f"Retrieving submitted posts of user {self.args.user}") if self.args.submitted:
generators.append( logger.debug(f"Retrieving submitted posts of user {user}")
self.create_filtered_listing_generator( generators.append(
self.reddit_instance.redditor(user).submissions, self.create_filtered_listing_generator(
self.reddit_instance.redditor(user).submissions,
)
) )
) if not self.authenticated and any((self.args.upvoted, self.args.saved)):
if not self.authenticated and any((self.args.upvoted, self.args.saved)): logger.warning("Accessing user lists requires authentication")
logger.warning("Accessing user lists requires authentication") else:
else: if self.args.upvoted:
if self.args.upvoted: logger.debug(f"Retrieving upvoted posts of user {user}")
logger.debug(f"Retrieving upvoted posts of user {self.args.user}") generators.append(self.reddit_instance.redditor(user).upvoted(limit=self.args.limit))
generators.append(self.reddit_instance.redditor(user).upvoted(limit=self.args.limit)) if self.args.saved:
if self.args.saved: logger.debug(f"Retrieving saved posts of user {user}")
logger.debug(f"Retrieving saved posts of user {self.args.user}") generators.append(self.reddit_instance.redditor(user).saved(limit=self.args.limit))
generators.append(self.reddit_instance.redditor(user).saved(limit=self.args.limit)) except prawcore.PrawcoreException as e:
logger.error(f"User {user} failed to be retrieved due to a PRAW exception: {e}")
logger.debug("Waiting 60 seconds to continue")
sleep(60)
return generators return generators
else: else:
return [] return []

View file

@ -8,6 +8,7 @@ import time
from datetime import datetime from datetime import datetime
from multiprocessing import Pool from multiprocessing import Pool
from pathlib import Path from pathlib import Path
from time import sleep
import praw import praw
import praw.exceptions import praw.exceptions
@ -42,11 +43,16 @@ class RedditDownloader(RedditConnector):
def download(self): def download(self):
for generator in self.reddit_lists: for generator in self.reddit_lists:
for submission in generator: try:
try: for submission in generator:
self._download_submission(submission) try:
except prawcore.PrawcoreException as e: self._download_submission(submission)
logger.error(f"Submission {submission.id} failed to download due to a PRAW exception: {e}") except prawcore.PrawcoreException as e:
logger.error(f"Submission {submission.id} failed to download due to a PRAW exception: {e}")
except prawcore.PrawcoreException as e:
logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}")
logger.debug("Waiting 60 seconds to continue")
sleep(60)
def _download_submission(self, submission: praw.models.Submission): def _download_submission(self, submission: praw.models.Submission):
if submission.id in self.excluded_submission_ids: if submission.id in self.excluded_submission_ids:

View file

@ -4,7 +4,9 @@
import re import re
import shutil import shutil
from pathlib import Path from pathlib import Path
from unittest.mock import MagicMock, patch
import prawcore
import pytest import pytest
from click.testing import CliRunner from click.testing import CliRunner
@ -176,3 +178,30 @@ def test_cli_archive_soft_fail(test_args: list[str], tmp_path: Path):
assert result.exit_code == 0 assert result.exit_code == 0
assert "failed to be archived due to a PRAW exception" in result.output assert "failed to be archived due to a PRAW exception" in result.output
assert "Attempting to archive" not in result.output assert "Attempting to archive" not in result.output
@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests")
@pytest.mark.parametrize(
("test_args", "response"),
(
(
["--user", "nasa", "--submitted"],
502,
),
(
["--user", "nasa", "--submitted"],
504,
),
),
)
def test_user_serv_fail(test_args: list[str], response: int, tmp_path: Path):
runner = CliRunner()
test_args = create_basic_args_for_archive_runner(test_args, tmp_path)
with patch("bdfr.connector.sleep", return_value=None):
with patch(
"bdfr.connector.RedditConnector.check_user_existence",
side_effect=prawcore.exceptions.ResponseException(MagicMock(status_code=response)),
):
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert f"received {response} HTTP response" in result.output

View file

@ -3,7 +3,9 @@
import shutil import shutil
from pathlib import Path from pathlib import Path
from unittest.mock import MagicMock, patch
import prawcore
import pytest import pytest
from click.testing import CliRunner from click.testing import CliRunner
@ -68,3 +70,30 @@ def test_cli_scrape_soft_fail(test_args: list[str], tmp_path: Path):
assert result.exit_code == 0 assert result.exit_code == 0
assert "Downloaded submission" not in result.output assert "Downloaded submission" not in result.output
assert "Record for entry item" not in result.output assert "Record for entry item" not in result.output
@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests")
@pytest.mark.parametrize(
("test_args", "response"),
(
(
["--user", "nasa", "--submitted"],
502,
),
(
["--user", "nasa", "--submitted"],
504,
),
),
)
def test_user_serv_fail(test_args: list[str], response: int, tmp_path: Path):
runner = CliRunner()
test_args = create_basic_args_for_cloner_runner(test_args, tmp_path)
with patch("bdfr.connector.sleep", return_value=None):
with patch(
"bdfr.connector.RedditConnector.check_user_existence",
side_effect=prawcore.exceptions.ResponseException(MagicMock(status_code=response)),
):
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert f"received {response} HTTP response" in result.output

View file

@ -3,7 +3,9 @@
import shutil import shutil
from pathlib import Path from pathlib import Path
from unittest.mock import MagicMock, patch
import prawcore
import pytest import pytest
from click.testing import CliRunner from click.testing import CliRunner
@ -396,3 +398,30 @@ def test_cli_download_score_filter(test_args: list[str], was_filtered: bool, tmp
result = runner.invoke(cli, test_args) result = runner.invoke(cli, test_args)
assert result.exit_code == 0 assert result.exit_code == 0
assert ("filtered due to score" in result.output) == was_filtered assert ("filtered due to score" in result.output) == was_filtered
@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests")
@pytest.mark.parametrize(
("test_args", "response"),
(
(
["--user", "nasa", "--submitted"],
502,
),
(
["--user", "nasa", "--submitted"],
504,
),
),
)
def test_user_serv_fail(test_args: list[str], response: int, tmp_path: Path):
runner = CliRunner()
test_args = create_basic_args_for_download_runner(test_args, tmp_path)
with patch("bdfr.connector.sleep", return_value=None):
with patch(
"bdfr.connector.RedditConnector.check_user_existence",
side_effect=prawcore.exceptions.ResponseException(MagicMock(status_code=response)),
):
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert f"received {response} HTTP response" in result.output