Merge pull request #713 from OMEGARAZER/development
This commit is contained in:
commit
58e1d1a8f9
|
@ -4,6 +4,7 @@
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
from time import sleep
|
||||||
from typing import Iterator, Union
|
from typing import Iterator, Union
|
||||||
|
|
||||||
import dict2xml
|
import dict2xml
|
||||||
|
@ -28,23 +29,28 @@ class Archiver(RedditConnector):
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
for generator in self.reddit_lists:
|
for generator in self.reddit_lists:
|
||||||
for submission in generator:
|
try:
|
||||||
try:
|
for submission in generator:
|
||||||
if (submission.author and submission.author.name in self.args.ignore_user) or (
|
try:
|
||||||
submission.author is None and "DELETED" in self.args.ignore_user
|
if (submission.author and submission.author.name in self.args.ignore_user) or (
|
||||||
):
|
submission.author is None and "DELETED" in self.args.ignore_user
|
||||||
logger.debug(
|
):
|
||||||
f"Submission {submission.id} in {submission.subreddit.display_name} skipped"
|
logger.debug(
|
||||||
f" due to {submission.author.name if submission.author else 'DELETED'} being an ignored user"
|
f"Submission {submission.id} in {submission.subreddit.display_name} skipped due to"
|
||||||
)
|
f" {submission.author.name if submission.author else 'DELETED'} being an ignored user"
|
||||||
continue
|
)
|
||||||
if submission.id in self.excluded_submission_ids:
|
continue
|
||||||
logger.debug(f"Object {submission.id} in exclusion list, skipping")
|
if submission.id in self.excluded_submission_ids:
|
||||||
continue
|
logger.debug(f"Object {submission.id} in exclusion list, skipping")
|
||||||
logger.debug(f"Attempting to archive submission {submission.id}")
|
continue
|
||||||
self.write_entry(submission)
|
logger.debug(f"Attempting to archive submission {submission.id}")
|
||||||
except prawcore.PrawcoreException as e:
|
self.write_entry(submission)
|
||||||
logger.error(f"Submission {submission.id} failed to be archived due to a PRAW exception: {e}")
|
except prawcore.PrawcoreException as e:
|
||||||
|
logger.error(f"Submission {submission.id} failed to be archived due to a PRAW exception: {e}")
|
||||||
|
except prawcore.PrawcoreException as e:
|
||||||
|
logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}")
|
||||||
|
logger.debug("Waiting 60 seconds to continue")
|
||||||
|
sleep(60)
|
||||||
|
|
||||||
def get_submissions_from_link(self) -> list[list[praw.models.Submission]]:
|
def get_submissions_from_link(self) -> list[list[praw.models.Submission]]:
|
||||||
supplied_submissions = []
|
supplied_submissions = []
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
import prawcore
|
import prawcore
|
||||||
|
|
||||||
|
@ -18,9 +19,14 @@ class RedditCloner(RedditDownloader, Archiver):
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
for generator in self.reddit_lists:
|
for generator in self.reddit_lists:
|
||||||
for submission in generator:
|
try:
|
||||||
try:
|
for submission in generator:
|
||||||
self._download_submission(submission)
|
try:
|
||||||
self.write_entry(submission)
|
self._download_submission(submission)
|
||||||
except prawcore.PrawcoreException as e:
|
self.write_entry(submission)
|
||||||
logger.error(f"Submission {submission.id} failed to be cloned due to a PRAW exception: {e}")
|
except prawcore.PrawcoreException as e:
|
||||||
|
logger.error(f"Submission {submission.id} failed to be cloned due to a PRAW exception: {e}")
|
||||||
|
except prawcore.PrawcoreException as e:
|
||||||
|
logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}")
|
||||||
|
logger.debug("Waiting 60 seconds to continue")
|
||||||
|
sleep(60)
|
||||||
|
|
|
@ -13,6 +13,7 @@ from abc import ABCMeta, abstractmethod
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from enum import Enum, auto
|
from enum import Enum, auto
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from time import sleep
|
||||||
from typing import Callable, Iterator
|
from typing import Callable, Iterator
|
||||||
|
|
||||||
import appdirs
|
import appdirs
|
||||||
|
@ -353,26 +354,31 @@ class RedditConnector(metaclass=ABCMeta):
|
||||||
generators = []
|
generators = []
|
||||||
for user in self.args.user:
|
for user in self.args.user:
|
||||||
try:
|
try:
|
||||||
self.check_user_existence(user)
|
try:
|
||||||
except errors.BulkDownloaderException as e:
|
self.check_user_existence(user)
|
||||||
logger.error(e)
|
except errors.BulkDownloaderException as e:
|
||||||
continue
|
logger.error(e)
|
||||||
if self.args.submitted:
|
continue
|
||||||
logger.debug(f"Retrieving submitted posts of user {self.args.user}")
|
if self.args.submitted:
|
||||||
generators.append(
|
logger.debug(f"Retrieving submitted posts of user {user}")
|
||||||
self.create_filtered_listing_generator(
|
generators.append(
|
||||||
self.reddit_instance.redditor(user).submissions,
|
self.create_filtered_listing_generator(
|
||||||
|
self.reddit_instance.redditor(user).submissions,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
if not self.authenticated and any((self.args.upvoted, self.args.saved)):
|
||||||
if not self.authenticated and any((self.args.upvoted, self.args.saved)):
|
logger.warning("Accessing user lists requires authentication")
|
||||||
logger.warning("Accessing user lists requires authentication")
|
else:
|
||||||
else:
|
if self.args.upvoted:
|
||||||
if self.args.upvoted:
|
logger.debug(f"Retrieving upvoted posts of user {user}")
|
||||||
logger.debug(f"Retrieving upvoted posts of user {self.args.user}")
|
generators.append(self.reddit_instance.redditor(user).upvoted(limit=self.args.limit))
|
||||||
generators.append(self.reddit_instance.redditor(user).upvoted(limit=self.args.limit))
|
if self.args.saved:
|
||||||
if self.args.saved:
|
logger.debug(f"Retrieving saved posts of user {user}")
|
||||||
logger.debug(f"Retrieving saved posts of user {self.args.user}")
|
generators.append(self.reddit_instance.redditor(user).saved(limit=self.args.limit))
|
||||||
generators.append(self.reddit_instance.redditor(user).saved(limit=self.args.limit))
|
except prawcore.PrawcoreException as e:
|
||||||
|
logger.error(f"User {user} failed to be retrieved due to a PRAW exception: {e}")
|
||||||
|
logger.debug("Waiting 60 seconds to continue")
|
||||||
|
sleep(60)
|
||||||
return generators
|
return generators
|
||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
|
|
|
@ -8,6 +8,7 @@ import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from multiprocessing import Pool
|
from multiprocessing import Pool
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
import praw
|
import praw
|
||||||
import praw.exceptions
|
import praw.exceptions
|
||||||
|
@ -42,11 +43,16 @@ class RedditDownloader(RedditConnector):
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
for generator in self.reddit_lists:
|
for generator in self.reddit_lists:
|
||||||
for submission in generator:
|
try:
|
||||||
try:
|
for submission in generator:
|
||||||
self._download_submission(submission)
|
try:
|
||||||
except prawcore.PrawcoreException as e:
|
self._download_submission(submission)
|
||||||
logger.error(f"Submission {submission.id} failed to download due to a PRAW exception: {e}")
|
except prawcore.PrawcoreException as e:
|
||||||
|
logger.error(f"Submission {submission.id} failed to download due to a PRAW exception: {e}")
|
||||||
|
except prawcore.PrawcoreException as e:
|
||||||
|
logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}")
|
||||||
|
logger.debug("Waiting 60 seconds to continue")
|
||||||
|
sleep(60)
|
||||||
|
|
||||||
def _download_submission(self, submission: praw.models.Submission):
|
def _download_submission(self, submission: praw.models.Submission):
|
||||||
if submission.id in self.excluded_submission_ids:
|
if submission.id in self.excluded_submission_ids:
|
||||||
|
|
|
@ -4,7 +4,9 @@
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import prawcore
|
||||||
import pytest
|
import pytest
|
||||||
from click.testing import CliRunner
|
from click.testing import CliRunner
|
||||||
|
|
||||||
|
@ -176,3 +178,30 @@ def test_cli_archive_soft_fail(test_args: list[str], tmp_path: Path):
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
assert "failed to be archived due to a PRAW exception" in result.output
|
assert "failed to be archived due to a PRAW exception" in result.output
|
||||||
assert "Attempting to archive" not in result.output
|
assert "Attempting to archive" not in result.output
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests")
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("test_args", "response"),
|
||||||
|
(
|
||||||
|
(
|
||||||
|
["--user", "nasa", "--submitted"],
|
||||||
|
502,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
["--user", "nasa", "--submitted"],
|
||||||
|
504,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
def test_user_serv_fail(test_args: list[str], response: int, tmp_path: Path):
|
||||||
|
runner = CliRunner()
|
||||||
|
test_args = create_basic_args_for_archive_runner(test_args, tmp_path)
|
||||||
|
with patch("bdfr.connector.sleep", return_value=None):
|
||||||
|
with patch(
|
||||||
|
"bdfr.connector.RedditConnector.check_user_existence",
|
||||||
|
side_effect=prawcore.exceptions.ResponseException(MagicMock(status_code=response)),
|
||||||
|
):
|
||||||
|
result = runner.invoke(cli, test_args)
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert f"received {response} HTTP response" in result.output
|
||||||
|
|
|
@ -3,7 +3,9 @@
|
||||||
|
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import prawcore
|
||||||
import pytest
|
import pytest
|
||||||
from click.testing import CliRunner
|
from click.testing import CliRunner
|
||||||
|
|
||||||
|
@ -68,3 +70,30 @@ def test_cli_scrape_soft_fail(test_args: list[str], tmp_path: Path):
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
assert "Downloaded submission" not in result.output
|
assert "Downloaded submission" not in result.output
|
||||||
assert "Record for entry item" not in result.output
|
assert "Record for entry item" not in result.output
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests")
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("test_args", "response"),
|
||||||
|
(
|
||||||
|
(
|
||||||
|
["--user", "nasa", "--submitted"],
|
||||||
|
502,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
["--user", "nasa", "--submitted"],
|
||||||
|
504,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
def test_user_serv_fail(test_args: list[str], response: int, tmp_path: Path):
|
||||||
|
runner = CliRunner()
|
||||||
|
test_args = create_basic_args_for_cloner_runner(test_args, tmp_path)
|
||||||
|
with patch("bdfr.connector.sleep", return_value=None):
|
||||||
|
with patch(
|
||||||
|
"bdfr.connector.RedditConnector.check_user_existence",
|
||||||
|
side_effect=prawcore.exceptions.ResponseException(MagicMock(status_code=response)),
|
||||||
|
):
|
||||||
|
result = runner.invoke(cli, test_args)
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert f"received {response} HTTP response" in result.output
|
||||||
|
|
|
@ -3,7 +3,9 @@
|
||||||
|
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import prawcore
|
||||||
import pytest
|
import pytest
|
||||||
from click.testing import CliRunner
|
from click.testing import CliRunner
|
||||||
|
|
||||||
|
@ -396,3 +398,30 @@ def test_cli_download_score_filter(test_args: list[str], was_filtered: bool, tmp
|
||||||
result = runner.invoke(cli, test_args)
|
result = runner.invoke(cli, test_args)
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
assert ("filtered due to score" in result.output) == was_filtered
|
assert ("filtered due to score" in result.output) == was_filtered
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests")
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("test_args", "response"),
|
||||||
|
(
|
||||||
|
(
|
||||||
|
["--user", "nasa", "--submitted"],
|
||||||
|
502,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
["--user", "nasa", "--submitted"],
|
||||||
|
504,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
def test_user_serv_fail(test_args: list[str], response: int, tmp_path: Path):
|
||||||
|
runner = CliRunner()
|
||||||
|
test_args = create_basic_args_for_download_runner(test_args, tmp_path)
|
||||||
|
with patch("bdfr.connector.sleep", return_value=None):
|
||||||
|
with patch(
|
||||||
|
"bdfr.connector.RedditConnector.check_user_existence",
|
||||||
|
side_effect=prawcore.exceptions.ResponseException(MagicMock(status_code=response)),
|
||||||
|
):
|
||||||
|
result = runner.invoke(cli, test_args)
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert f"received {response} HTTP response" in result.output
|
||||||
|
|
Loading…
Reference in a new issue