Allow multilevel folder schemes (#251)
* Improve some formatting * Allow multilevel folder names
This commit is contained in:
parent
dc9d02a28c
commit
702295f1ea
|
@ -16,13 +16,21 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
|
||||
class FileNameFormatter:
|
||||
key_terms = ('title', 'subreddit', 'redditor', 'postid', 'upvotes', 'flair', 'date')
|
||||
key_terms = (
|
||||
'date',
|
||||
'flair',
|
||||
'postid',
|
||||
'redditor',
|
||||
'subreddit',
|
||||
'title',
|
||||
'upvotes',
|
||||
)
|
||||
|
||||
def __init__(self, file_format_string: str, directory_format_string: str):
|
||||
if not self.validate_string(file_format_string):
|
||||
raise BulkDownloaderException(f'"{file_format_string}" is not a valid format string')
|
||||
self.file_format_string = file_format_string
|
||||
self.directory_format_string = directory_format_string
|
||||
self.directory_format_string: list[str] = directory_format_string.split('/')
|
||||
|
||||
@staticmethod
|
||||
def _format_name(submission: (Comment, Submission), format_string: str) -> str:
|
||||
|
@ -34,8 +42,8 @@ class FileNameFormatter:
|
|||
raise BulkDownloaderException(f'Cannot name object {type(submission).__name__}')
|
||||
result = format_string
|
||||
for key in attributes.keys():
|
||||
if re.search(r'(?i).*{{{}}}.*'.format(key), result):
|
||||
result = re.sub(r'(?i){{{}}}'.format(key), str(attributes.get(key, 'unknown')), result)
|
||||
if re.search(fr'(?i).*{{{key}}}.*', result):
|
||||
result = re.sub(fr'(?i){{{key}}}', str(attributes.get(key, 'unknown')), result)
|
||||
logger.log(9, f'Found key string {key} in name')
|
||||
|
||||
result = result.replace('/', '')
|
||||
|
@ -67,7 +75,7 @@ class FileNameFormatter:
|
|||
'postid': comment.id,
|
||||
'upvotes': comment.score,
|
||||
'flair': '',
|
||||
'date': comment.created_utc
|
||||
'date': comment.created_utc,
|
||||
}
|
||||
return comment_attributes
|
||||
|
||||
|
@ -75,8 +83,12 @@ class FileNameFormatter:
|
|||
self,
|
||||
resource: Resource,
|
||||
destination_directory: Path,
|
||||
index: Optional[int] = None) -> Path:
|
||||
subfolder = destination_directory / self._format_name(resource.source_submission, self.directory_format_string)
|
||||
index: Optional[int] = None,
|
||||
) -> Path:
|
||||
subfolder = Path(
|
||||
destination_directory,
|
||||
*[self._format_name(resource.source_submission, part) for part in self.directory_format_string]
|
||||
)
|
||||
index = f'_{str(index)}' if index else ''
|
||||
if not resource.extension:
|
||||
raise BulkDownloaderException(f'Resource from {resource.url} has no extension')
|
||||
|
@ -102,8 +114,11 @@ class FileNameFormatter:
|
|||
filename = filename[:-1]
|
||||
return filename + ending
|
||||
|
||||
def format_resource_paths(self, resources: list[Resource],
|
||||
destination_directory: Path) -> list[tuple[Path, Resource]]:
|
||||
def format_resource_paths(
|
||||
self,
|
||||
resources: list[Resource],
|
||||
destination_directory: Path,
|
||||
) -> list[tuple[Path, Resource]]:
|
||||
out = []
|
||||
if len(resources) == 1:
|
||||
out.append((self.format_path(resources[0], destination_directory, None), resources[0]))
|
||||
|
@ -121,7 +136,8 @@ class FileNameFormatter:
|
|||
if result:
|
||||
if 'POSTID' not in test_string:
|
||||
logger.warning(
|
||||
f'Post ID not included in this file scheme, so file names are not guaranteed to be unique')
|
||||
'Some files might not be downloaded due to name conflicts as filenames are'
|
||||
' not guaranteed to be be unique without {POSTID}')
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
|
|
@ -26,7 +26,7 @@ def submission() -> MagicMock:
|
|||
return test
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
@pytest.fixture(scope='session')
|
||||
def reddit_submission(reddit_instance: praw.Reddit) -> praw.models.Submission:
|
||||
return reddit_instance.submission(id='lgilgt')
|
||||
|
||||
|
@ -267,9 +267,30 @@ def test_format_archive_entry_comment(
|
|||
test_comment_id: str,
|
||||
expected_name: str,
|
||||
tmp_path: Path,
|
||||
reddit_instance: praw.Reddit):
|
||||
reddit_instance: praw.Reddit,
|
||||
):
|
||||
test_comment = reddit_instance.comment(id=test_comment_id)
|
||||
test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme)
|
||||
test_entry = Resource(test_comment, '', '.json')
|
||||
result = test_formatter.format_path(test_entry, tmp_path)
|
||||
assert result.name == expected_name
|
||||
|
||||
|
||||
@pytest.mark.parametrize(('test_folder_scheme', 'expected'), (
|
||||
('{REDDITOR}/{SUBREDDIT}', 'person/randomreddit'),
|
||||
('{POSTID}/{SUBREDDIT}/{REDDITOR}', '12345/randomreddit/person'),
|
||||
))
|
||||
def test_multilevel_folder_scheme(
|
||||
test_folder_scheme: str,
|
||||
expected: str,
|
||||
tmp_path: Path,
|
||||
submission: MagicMock,
|
||||
):
|
||||
test_formatter = FileNameFormatter('{POSTID}', test_folder_scheme)
|
||||
test_resource = MagicMock()
|
||||
test_resource.source_submission = submission
|
||||
test_resource.extension = '.png'
|
||||
result = test_formatter.format_path(test_resource, tmp_path)
|
||||
result = result.relative_to(tmp_path)
|
||||
assert str(result.parent) == expected
|
||||
assert len(result.parents) == (len(expected.split('/')) + 1)
|
||||
|
|
Loading…
Reference in a new issue