diff --git a/bulkredditdownloader/__main__.py b/bulkredditdownloader/__main__.py index 3625c88..d8d1f08 100644 --- a/bulkredditdownloader/__main__.py +++ b/bulkredditdownloader/__main__.py @@ -57,10 +57,12 @@ def _setup_logging(verbosity: int): formatter = logging.Formatter('[%(asctime)s - %(name)s - %(levelname)s] - %(message)s') stream.setFormatter(formatter) logger.addHandler(stream) - if verbosity < 0: + if verbosity <= 0: stream.setLevel(logging.INFO) - else: + elif verbosity == 1: stream.setLevel(logging.DEBUG) + else: + stream.setLevel(9) logging.getLogger('praw').setLevel(logging.CRITICAL) logging.getLogger('prawcore').setLevel(logging.CRITICAL) logging.getLogger('urllib3').setLevel(logging.CRITICAL) diff --git a/bulkredditdownloader/download_filter.py b/bulkredditdownloader/download_filter.py index 806fd0d..37a6ce9 100644 --- a/bulkredditdownloader/download_filter.py +++ b/bulkredditdownloader/download_filter.py @@ -1,8 +1,11 @@ #!/usr/bin/env python3 # coding=utf-8 +import logging import re +logger = logging.getLogger(__name__) + class DownloadFilter: def __init__(self, excluded_extensions: list[str] = None, excluded_domains: list[str] = None): @@ -24,6 +27,7 @@ class DownloadFilter: combined_extensions = '|'.join(self.excluded_extensions) pattern = re.compile(r'.*({})$'.format(combined_extensions)) if re.match(pattern, url): + logger.log(9, f'Url "{url}" matched with "{str(pattern)}"') return False else: return True @@ -34,6 +38,7 @@ class DownloadFilter: combined_domains = '|'.join(self.excluded_domains) pattern = re.compile(r'https?://.*({}).*'.format(combined_domains)) if re.match(pattern, url): + logger.log(9, f'Url "{url}" matched with "{str(pattern)}"') return False else: return True diff --git a/bulkredditdownloader/downloader.py b/bulkredditdownloader/downloader.py index b60ed2b..5b939b0 100644 --- a/bulkredditdownloader/downloader.py +++ b/bulkredditdownloader/downloader.py @@ -57,13 +57,13 @@ class RedditDownloader: self._create_file_logger() self.download_filter = self._create_download_filter() - logger.debug('Created download filter') + logger.log(9, 'Created download filter') self.time_filter = self._create_time_filter() - logger.debug('Created time filter') + logger.log(9, 'Created time filter') self.sort_filter = self._create_sort_filter() - logger.debug('Created sort filter') + logger.log(9, 'Created sort filter') self.file_name_formatter = self._create_file_name_formatter() - logger.debug('Create file name formatter') + logger.log(9, 'Create file name formatter') self._resolve_user_name() self._load_config() @@ -71,14 +71,14 @@ class RedditDownloader: self.master_hash_list = [] self.authenticator = self._create_authenticator() - logger.debug('Created site authenticator') + logger.log(9, 'Created site authenticator') self._create_reddit_instance() def _create_reddit_instance(self): if self.args.authenticate: logger.debug('Using authenticated Reddit instance') if not self.cfg_parser.has_option('DEFAULT', 'user_token'): - logger.debug('Commencing OAuth2 authentication') + logger.log(9, 'Commencing OAuth2 authentication') scopes = self.cfg_parser.get('DEFAULT', 'scopes') scopes = OAuth2Authenticator.split_scopes(scopes) oauth2_authenticator = OAuth2Authenticator( @@ -106,13 +106,13 @@ class RedditDownloader: def _retrieve_reddit_lists(self) -> list[praw.models.ListingGenerator]: master_list = [] master_list.extend(self._get_subreddits()) - logger.debug('Retrieved subreddits') + logger.log(9, 'Retrieved subreddits') master_list.extend(self._get_multireddits()) - logger.debug('Retrieved multireddits') + logger.log(9, 'Retrieved multireddits') master_list.extend(self._get_user_data()) - logger.debug('Retrieved user data') + logger.log(9, 'Retrieved user data') master_list.extend(self._get_submissions_from_link()) - logger.debug('Retrieved submissions for given links') + logger.log(9, 'Retrieved submissions for given links') return master_list def _determine_directories(self): @@ -140,6 +140,7 @@ class RedditDownloader: for path in possible_paths: if path.resolve().expanduser().exists(): self.config_location = path + logger.debug(f'Loading configuration from {path}') break if not self.config_location: raise errors.BulkDownloaderException('Could not find a configuration file to load') @@ -181,6 +182,7 @@ class RedditDownloader: def _resolve_user_name(self): if self.args.user == 'me': self.args.user = self.reddit_instance.user.me().name + logger.log(9, f'Resolved user to {self.args.user}') def _get_submissions_from_link(self) -> list[list[praw.models.Submission]]: supplied_submissions = [] @@ -227,6 +229,7 @@ class RedditDownloader: generators = [] sort_function = self._determine_sort_function() if self.args.submitted: + logger.debug(f'Retrieving submitted posts of user {self.args.user}') generators.append( sort_function( self.reddit_instance.redditor(self.args.user).submissions, @@ -235,8 +238,10 @@ class RedditDownloader: raise errors.RedditAuthenticationError('Accessing user lists requires authentication') else: if self.args.upvoted: + logger.debug(f'Retrieving upvoted posts of user {self.args.user}') generators.append(self.reddit_instance.redditor(self.args.user).upvoted(limit=self.args.limit)) if self.args.saved: + logger.debug(f'Retrieving saved posts of user {self.args.user}') generators.append(self.reddit_instance.redditor(self.args.user).saved(limit=self.args.limit)) return generators else: @@ -277,11 +282,11 @@ class RedditDownloader: def download(self): for generator in self.reddit_lists: for submission in generator: + logger.debug(f'Attempting to download submission {submission.id}') self._download_submission(submission) def _download_submission(self, submission: praw.models.Submission): if self.download_filter.check_url(submission.url): - logger.debug(f'Attempting to download submission {submission.id}') try: downloader_class = DownloadFactory.pull_lever(submission.url) @@ -293,11 +298,12 @@ class RedditDownloader: content = downloader.find_resources(self.authenticator) for destination, res in self.file_name_formatter.format_resource_paths(content, self.download_directory): if destination.exists(): - logger.debug(f'File already exists: {destination}') + logger.warning(f'File already exists: {destination}') else: res.download() if res.hash.hexdigest() in self.master_hash_list and self.args.no_dupes: - logger.debug(f'Resource from {res.url} downloaded elsewhere') + logger.warning( + f'Resource from "{res.url}" and hash "{res.hash.hexdigest()}" downloaded elsewhere') else: # TODO: consider making a hard link/symlink here destination.parent.mkdir(parents=True, exist_ok=True) diff --git a/bulkredditdownloader/file_name_formatter.py b/bulkredditdownloader/file_name_formatter.py index 3575b00..5be0213 100644 --- a/bulkredditdownloader/file_name_formatter.py +++ b/bulkredditdownloader/file_name_formatter.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # coding=utf-8 +import logging import re from pathlib import Path from typing import Optional @@ -10,6 +11,8 @@ import praw.models from bulkredditdownloader.exceptions import BulkDownloaderException from bulkredditdownloader.resource import Resource +logger = logging.getLogger(__name__) + class FileNameFormatter: key_terms = ('title', 'subreddit', 'redditor', 'postid', 'upvotes', 'flair', 'date') @@ -35,6 +38,7 @@ class FileNameFormatter: for key in submission_attributes.keys(): if re.search(r'(?i).*{{{}}}.*'.format(key), result): result = re.sub(r'(?i){{{}}}'.format(key), str(submission_attributes.get(key, 'unknown')), result) + logger.log(9, f'Found key string {key} in name') result = result.replace('/', '') return result @@ -42,14 +46,18 @@ class FileNameFormatter: def _format_path(self, resource: Resource, destination_directory: Path, index: Optional[int] = None) -> Path: subfolder = destination_directory / self._format_name(resource.source_submission, self.directory_format_string) index = f'_{str(index)}' if index else '' - file_path = subfolder / (str(self._format_name(resource.source_submission, - self.file_format_string)) + index + resource.extension) + try: + file_path = subfolder / (str(self._format_name(resource.source_submission, + self.file_format_string)) + index + resource.extension) + except TypeError: + raise BulkDownloaderException(f'Could not determine path name: {subfolder}, {index}, {resource.extension}') return file_path def format_resource_paths(self, resources: list[Resource], destination_directory: Path) -> list[tuple[Path, Resource]]: out = [] for i, res in enumerate(resources, start=1): + logger.log(9, f'Formatting filename with index {i}') out.append((self._format_path(res, destination_directory, i), res)) return out diff --git a/bulkredditdownloader/oauth2.py b/bulkredditdownloader/oauth2.py index a29d907..9678b45 100644 --- a/bulkredditdownloader/oauth2.py +++ b/bulkredditdownloader/oauth2.py @@ -70,12 +70,12 @@ class OAuth2Authenticator: server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) server.bind(('localhost', 7634)) - logger.debug('Server listening on localhost:7634') + logger.log(9, 'Server listening on localhost:7634') server.listen(1) client = server.accept()[0] server.close() - logger.debug('Server closed') + logger.log(9, 'Server closed') return client @@ -95,7 +95,7 @@ class OAuth2TokenManager(praw.reddit.BaseTokenManager): if authorizer.refresh_token is None: if self.config.has_option('DEFAULT', 'user_token'): authorizer.refresh_token = self.config.get('DEFAULT', 'user_token') - logger.debug('Loaded OAuth2 token for authoriser') + logger.log(9, 'Loaded OAuth2 token for authoriser') else: raise RedditAuthenticationError('No auth token loaded in configuration') @@ -103,4 +103,4 @@ class OAuth2TokenManager(praw.reddit.BaseTokenManager): self.config.set('DEFAULT', 'user_token', authorizer.refresh_token) with open(self.config_location, 'w') as file: self.config.write(file, True) - logger.debug(f'Written OAuth2 token from authoriser to {self.config_location}') + logger.log(9, f'Written OAuth2 token from authoriser to {self.config_location}') diff --git a/bulkredditdownloader/resource.py b/bulkredditdownloader/resource.py index 30cbd3d..a93cc0c 100644 --- a/bulkredditdownloader/resource.py +++ b/bulkredditdownloader/resource.py @@ -2,6 +2,7 @@ # coding=utf-8 import hashlib +import logging import re import time from typing import Optional @@ -12,6 +13,8 @@ from praw.models import Submission from bulkredditdownloader.exceptions import BulkDownloaderException +logger = logging.getLogger(__name__) + class Resource: def __init__(self, source_submission: Submission, url: str, extension: str = None): @@ -32,10 +35,12 @@ class Resource: else: raise requests.exceptions.ConnectionError except requests.exceptions.ConnectionError: + logger.log(9, f'Error occured downloading resource, waiting {wait_time} seconds') time.sleep(wait_time) if wait_time < 300: return Resource.retry_download(url, wait_time + 60) else: + logger.error(f'Max wait time exceeded for resource at url {url}') return None def download(self):