Add function to calculate all existing file hashes if wanted
This commit is contained in:
parent
f941161014
commit
6d6327a396
4 changed files with 24 additions and 1 deletions
|
@ -39,6 +39,7 @@ def cli():
|
||||||
@click.option('--set-folder-scheme', default=None, type=str)
|
@click.option('--set-folder-scheme', default=None, type=str)
|
||||||
@click.option('--no-dupes', is_flag=True, default=None)
|
@click.option('--no-dupes', is_flag=True, default=None)
|
||||||
@click.option('--config', type=str, default=None)
|
@click.option('--config', type=str, default=None)
|
||||||
|
@click.option('--search-existing', is_flag=True, default=None)
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def cli_download(context: click.Context, **_):
|
def cli_download(context: click.Context, **_):
|
||||||
config = Configuration()
|
config = Configuration()
|
||||||
|
|
|
@ -19,6 +19,7 @@ class Configuration(Namespace):
|
||||||
self.no_dupes: bool = False
|
self.no_dupes: bool = False
|
||||||
self.saved: bool = False
|
self.saved: bool = False
|
||||||
self.search: Optional[str] = None
|
self.search: Optional[str] = None
|
||||||
|
self.search_existing: bool = False
|
||||||
self.set_file_scheme: str = '{REDDITOR}_{TITLE}_{POSTID}'
|
self.set_file_scheme: str = '{REDDITOR}_{TITLE}_{POSTID}'
|
||||||
self.set_folder_scheme: str = '{SUBREDDIT}'
|
self.set_folder_scheme: str = '{SUBREDDIT}'
|
||||||
self.skip: list[str] = []
|
self.skip: list[str] = []
|
||||||
|
|
|
@ -2,7 +2,9 @@
|
||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
|
|
||||||
import configparser
|
import configparser
|
||||||
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
@ -116,7 +118,7 @@ class RedditDownloader:
|
||||||
return master_list
|
return master_list
|
||||||
|
|
||||||
def _determine_directories(self):
|
def _determine_directories(self):
|
||||||
self.download_directory = Path(self.args.directory)
|
self.download_directory = Path(self.args.directory).resolve().expanduser()
|
||||||
self.logfile_directory = self.download_directory / 'LOG_FILES'
|
self.logfile_directory = self.download_directory / 'LOG_FILES'
|
||||||
self.config_directory = self.config_directories.user_config_dir
|
self.config_directory = self.config_directories.user_config_dir
|
||||||
|
|
||||||
|
@ -313,3 +315,15 @@ class RedditDownloader:
|
||||||
self.master_hash_list.append(res.hash.hexdigest())
|
self.master_hash_list.append(res.hash.hexdigest())
|
||||||
logger.debug(f'Hash added to master list: {res.hash.hexdigest()}')
|
logger.debug(f'Hash added to master list: {res.hash.hexdigest()}')
|
||||||
logger.info(f'Downloaded submission {submission.name}')
|
logger.info(f'Downloaded submission {submission.name}')
|
||||||
|
|
||||||
|
def scan_existing_files(self) -> list[str]:
|
||||||
|
files = []
|
||||||
|
for (dirpath, dirnames, filenames) in os.walk(self.download_directory):
|
||||||
|
files.extend([Path(dirpath, file) for file in filenames])
|
||||||
|
logger.info(f'Calculating hashes for {len(files)} files')
|
||||||
|
hash_list = []
|
||||||
|
for existing_file in files:
|
||||||
|
with open(existing_file, 'rb') as file:
|
||||||
|
hash_list.append(hashlib.md5(file.read()).hexdigest())
|
||||||
|
logger.log(9, f'Hash calculated for file at {existing_file}')
|
||||||
|
return hash_list
|
||||||
|
|
|
@ -407,3 +407,10 @@ def test_download_submission_hash_exists(
|
||||||
def test_sanitise_subreddit_name(test_name: str, expected: str):
|
def test_sanitise_subreddit_name(test_name: str, expected: str):
|
||||||
result = RedditDownloader._sanitise_subreddit_name(test_name)
|
result = RedditDownloader._sanitise_subreddit_name(test_name)
|
||||||
assert result == expected
|
assert result == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_search_existing_files(downloader_mock: MagicMock):
|
||||||
|
downloader_mock.download_directory = Path('.').resolve().expanduser()
|
||||||
|
results = RedditDownloader.scan_existing_files(downloader_mock)
|
||||||
|
assert all([isinstance(result, str) for result in results])
|
||||||
|
assert len(results) >= 40
|
||||||
|
|
Loading…
Reference in a new issue