Add much more logging
This commit is contained in:
parent
312769cb66
commit
f941161014
6 changed files with 47 additions and 21 deletions
|
@ -57,10 +57,12 @@ def _setup_logging(verbosity: int):
|
|||
formatter = logging.Formatter('[%(asctime)s - %(name)s - %(levelname)s] - %(message)s')
|
||||
stream.setFormatter(formatter)
|
||||
logger.addHandler(stream)
|
||||
if verbosity < 0:
|
||||
if verbosity <= 0:
|
||||
stream.setLevel(logging.INFO)
|
||||
else:
|
||||
elif verbosity == 1:
|
||||
stream.setLevel(logging.DEBUG)
|
||||
else:
|
||||
stream.setLevel(9)
|
||||
logging.getLogger('praw').setLevel(logging.CRITICAL)
|
||||
logging.getLogger('prawcore').setLevel(logging.CRITICAL)
|
||||
logging.getLogger('urllib3').setLevel(logging.CRITICAL)
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DownloadFilter:
|
||||
def __init__(self, excluded_extensions: list[str] = None, excluded_domains: list[str] = None):
|
||||
|
@ -24,6 +27,7 @@ class DownloadFilter:
|
|||
combined_extensions = '|'.join(self.excluded_extensions)
|
||||
pattern = re.compile(r'.*({})$'.format(combined_extensions))
|
||||
if re.match(pattern, url):
|
||||
logger.log(9, f'Url "{url}" matched with "{str(pattern)}"')
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
@ -34,6 +38,7 @@ class DownloadFilter:
|
|||
combined_domains = '|'.join(self.excluded_domains)
|
||||
pattern = re.compile(r'https?://.*({}).*'.format(combined_domains))
|
||||
if re.match(pattern, url):
|
||||
logger.log(9, f'Url "{url}" matched with "{str(pattern)}"')
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
|
|
@ -57,13 +57,13 @@ class RedditDownloader:
|
|||
self._create_file_logger()
|
||||
|
||||
self.download_filter = self._create_download_filter()
|
||||
logger.debug('Created download filter')
|
||||
logger.log(9, 'Created download filter')
|
||||
self.time_filter = self._create_time_filter()
|
||||
logger.debug('Created time filter')
|
||||
logger.log(9, 'Created time filter')
|
||||
self.sort_filter = self._create_sort_filter()
|
||||
logger.debug('Created sort filter')
|
||||
logger.log(9, 'Created sort filter')
|
||||
self.file_name_formatter = self._create_file_name_formatter()
|
||||
logger.debug('Create file name formatter')
|
||||
logger.log(9, 'Create file name formatter')
|
||||
|
||||
self._resolve_user_name()
|
||||
self._load_config()
|
||||
|
@ -71,14 +71,14 @@ class RedditDownloader:
|
|||
|
||||
self.master_hash_list = []
|
||||
self.authenticator = self._create_authenticator()
|
||||
logger.debug('Created site authenticator')
|
||||
logger.log(9, 'Created site authenticator')
|
||||
self._create_reddit_instance()
|
||||
|
||||
def _create_reddit_instance(self):
|
||||
if self.args.authenticate:
|
||||
logger.debug('Using authenticated Reddit instance')
|
||||
if not self.cfg_parser.has_option('DEFAULT', 'user_token'):
|
||||
logger.debug('Commencing OAuth2 authentication')
|
||||
logger.log(9, 'Commencing OAuth2 authentication')
|
||||
scopes = self.cfg_parser.get('DEFAULT', 'scopes')
|
||||
scopes = OAuth2Authenticator.split_scopes(scopes)
|
||||
oauth2_authenticator = OAuth2Authenticator(
|
||||
|
@ -106,13 +106,13 @@ class RedditDownloader:
|
|||
def _retrieve_reddit_lists(self) -> list[praw.models.ListingGenerator]:
|
||||
master_list = []
|
||||
master_list.extend(self._get_subreddits())
|
||||
logger.debug('Retrieved subreddits')
|
||||
logger.log(9, 'Retrieved subreddits')
|
||||
master_list.extend(self._get_multireddits())
|
||||
logger.debug('Retrieved multireddits')
|
||||
logger.log(9, 'Retrieved multireddits')
|
||||
master_list.extend(self._get_user_data())
|
||||
logger.debug('Retrieved user data')
|
||||
logger.log(9, 'Retrieved user data')
|
||||
master_list.extend(self._get_submissions_from_link())
|
||||
logger.debug('Retrieved submissions for given links')
|
||||
logger.log(9, 'Retrieved submissions for given links')
|
||||
return master_list
|
||||
|
||||
def _determine_directories(self):
|
||||
|
@ -140,6 +140,7 @@ class RedditDownloader:
|
|||
for path in possible_paths:
|
||||
if path.resolve().expanduser().exists():
|
||||
self.config_location = path
|
||||
logger.debug(f'Loading configuration from {path}')
|
||||
break
|
||||
if not self.config_location:
|
||||
raise errors.BulkDownloaderException('Could not find a configuration file to load')
|
||||
|
@ -181,6 +182,7 @@ class RedditDownloader:
|
|||
def _resolve_user_name(self):
|
||||
if self.args.user == 'me':
|
||||
self.args.user = self.reddit_instance.user.me().name
|
||||
logger.log(9, f'Resolved user to {self.args.user}')
|
||||
|
||||
def _get_submissions_from_link(self) -> list[list[praw.models.Submission]]:
|
||||
supplied_submissions = []
|
||||
|
@ -227,6 +229,7 @@ class RedditDownloader:
|
|||
generators = []
|
||||
sort_function = self._determine_sort_function()
|
||||
if self.args.submitted:
|
||||
logger.debug(f'Retrieving submitted posts of user {self.args.user}')
|
||||
generators.append(
|
||||
sort_function(
|
||||
self.reddit_instance.redditor(self.args.user).submissions,
|
||||
|
@ -235,8 +238,10 @@ class RedditDownloader:
|
|||
raise errors.RedditAuthenticationError('Accessing user lists requires authentication')
|
||||
else:
|
||||
if self.args.upvoted:
|
||||
logger.debug(f'Retrieving upvoted posts of user {self.args.user}')
|
||||
generators.append(self.reddit_instance.redditor(self.args.user).upvoted(limit=self.args.limit))
|
||||
if self.args.saved:
|
||||
logger.debug(f'Retrieving saved posts of user {self.args.user}')
|
||||
generators.append(self.reddit_instance.redditor(self.args.user).saved(limit=self.args.limit))
|
||||
return generators
|
||||
else:
|
||||
|
@ -277,11 +282,11 @@ class RedditDownloader:
|
|||
def download(self):
|
||||
for generator in self.reddit_lists:
|
||||
for submission in generator:
|
||||
logger.debug(f'Attempting to download submission {submission.id}')
|
||||
self._download_submission(submission)
|
||||
|
||||
def _download_submission(self, submission: praw.models.Submission):
|
||||
if self.download_filter.check_url(submission.url):
|
||||
logger.debug(f'Attempting to download submission {submission.id}')
|
||||
|
||||
try:
|
||||
downloader_class = DownloadFactory.pull_lever(submission.url)
|
||||
|
@ -293,11 +298,12 @@ class RedditDownloader:
|
|||
content = downloader.find_resources(self.authenticator)
|
||||
for destination, res in self.file_name_formatter.format_resource_paths(content, self.download_directory):
|
||||
if destination.exists():
|
||||
logger.debug(f'File already exists: {destination}')
|
||||
logger.warning(f'File already exists: {destination}')
|
||||
else:
|
||||
res.download()
|
||||
if res.hash.hexdigest() in self.master_hash_list and self.args.no_dupes:
|
||||
logger.debug(f'Resource from {res.url} downloaded elsewhere')
|
||||
logger.warning(
|
||||
f'Resource from "{res.url}" and hash "{res.hash.hexdigest()}" downloaded elsewhere')
|
||||
else:
|
||||
# TODO: consider making a hard link/symlink here
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
@ -10,6 +11,8 @@ import praw.models
|
|||
from bulkredditdownloader.exceptions import BulkDownloaderException
|
||||
from bulkredditdownloader.resource import Resource
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FileNameFormatter:
|
||||
key_terms = ('title', 'subreddit', 'redditor', 'postid', 'upvotes', 'flair', 'date')
|
||||
|
@ -35,6 +38,7 @@ class FileNameFormatter:
|
|||
for key in submission_attributes.keys():
|
||||
if re.search(r'(?i).*{{{}}}.*'.format(key), result):
|
||||
result = re.sub(r'(?i){{{}}}'.format(key), str(submission_attributes.get(key, 'unknown')), result)
|
||||
logger.log(9, f'Found key string {key} in name')
|
||||
|
||||
result = result.replace('/', '')
|
||||
return result
|
||||
|
@ -42,14 +46,18 @@ class FileNameFormatter:
|
|||
def _format_path(self, resource: Resource, destination_directory: Path, index: Optional[int] = None) -> Path:
|
||||
subfolder = destination_directory / self._format_name(resource.source_submission, self.directory_format_string)
|
||||
index = f'_{str(index)}' if index else ''
|
||||
file_path = subfolder / (str(self._format_name(resource.source_submission,
|
||||
self.file_format_string)) + index + resource.extension)
|
||||
try:
|
||||
file_path = subfolder / (str(self._format_name(resource.source_submission,
|
||||
self.file_format_string)) + index + resource.extension)
|
||||
except TypeError:
|
||||
raise BulkDownloaderException(f'Could not determine path name: {subfolder}, {index}, {resource.extension}')
|
||||
return file_path
|
||||
|
||||
def format_resource_paths(self, resources: list[Resource],
|
||||
destination_directory: Path) -> list[tuple[Path, Resource]]:
|
||||
out = []
|
||||
for i, res in enumerate(resources, start=1):
|
||||
logger.log(9, f'Formatting filename with index {i}')
|
||||
out.append((self._format_path(res, destination_directory, i), res))
|
||||
return out
|
||||
|
||||
|
|
|
@ -70,12 +70,12 @@ class OAuth2Authenticator:
|
|||
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
server.bind(('localhost', 7634))
|
||||
logger.debug('Server listening on localhost:7634')
|
||||
logger.log(9, 'Server listening on localhost:7634')
|
||||
|
||||
server.listen(1)
|
||||
client = server.accept()[0]
|
||||
server.close()
|
||||
logger.debug('Server closed')
|
||||
logger.log(9, 'Server closed')
|
||||
|
||||
return client
|
||||
|
||||
|
@ -95,7 +95,7 @@ class OAuth2TokenManager(praw.reddit.BaseTokenManager):
|
|||
if authorizer.refresh_token is None:
|
||||
if self.config.has_option('DEFAULT', 'user_token'):
|
||||
authorizer.refresh_token = self.config.get('DEFAULT', 'user_token')
|
||||
logger.debug('Loaded OAuth2 token for authoriser')
|
||||
logger.log(9, 'Loaded OAuth2 token for authoriser')
|
||||
else:
|
||||
raise RedditAuthenticationError('No auth token loaded in configuration')
|
||||
|
||||
|
@ -103,4 +103,4 @@ class OAuth2TokenManager(praw.reddit.BaseTokenManager):
|
|||
self.config.set('DEFAULT', 'user_token', authorizer.refresh_token)
|
||||
with open(self.config_location, 'w') as file:
|
||||
self.config.write(file, True)
|
||||
logger.debug(f'Written OAuth2 token from authoriser to {self.config_location}')
|
||||
logger.log(9, f'Written OAuth2 token from authoriser to {self.config_location}')
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# coding=utf-8
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from typing import Optional
|
||||
|
@ -12,6 +13,8 @@ from praw.models import Submission
|
|||
|
||||
from bulkredditdownloader.exceptions import BulkDownloaderException
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Resource:
|
||||
def __init__(self, source_submission: Submission, url: str, extension: str = None):
|
||||
|
@ -32,10 +35,12 @@ class Resource:
|
|||
else:
|
||||
raise requests.exceptions.ConnectionError
|
||||
except requests.exceptions.ConnectionError:
|
||||
logger.log(9, f'Error occured downloading resource, waiting {wait_time} seconds')
|
||||
time.sleep(wait_time)
|
||||
if wait_time < 300:
|
||||
return Resource.retry_download(url, wait_time + 60)
|
||||
else:
|
||||
logger.error(f'Max wait time exceeded for resource at url {url}')
|
||||
return None
|
||||
|
||||
def download(self):
|
||||
|
|
Loading…
Reference in a new issue