1
0
Fork 0
mirror of synced 2024-06-01 18:09:47 +12:00

Allow multilevel folder schemes (#251)

* Improve some formatting

* Allow multilevel folder names
This commit is contained in:
Serene 2021-04-04 16:16:06 +10:00 committed by Ali Parlakci
parent dc9d02a28c
commit 702295f1ea
2 changed files with 49 additions and 12 deletions

View file

@ -16,13 +16,21 @@ logger = logging.getLogger(__name__)
class FileNameFormatter:
key_terms = ('title', 'subreddit', 'redditor', 'postid', 'upvotes', 'flair', 'date')
key_terms = (
'date',
'flair',
'postid',
'redditor',
'subreddit',
'title',
'upvotes',
)
def __init__(self, file_format_string: str, directory_format_string: str):
if not self.validate_string(file_format_string):
raise BulkDownloaderException(f'"{file_format_string}" is not a valid format string')
self.file_format_string = file_format_string
self.directory_format_string = directory_format_string
self.directory_format_string: list[str] = directory_format_string.split('/')
@staticmethod
def _format_name(submission: (Comment, Submission), format_string: str) -> str:
@ -34,8 +42,8 @@ class FileNameFormatter:
raise BulkDownloaderException(f'Cannot name object {type(submission).__name__}')
result = format_string
for key in attributes.keys():
if re.search(r'(?i).*{{{}}}.*'.format(key), result):
result = re.sub(r'(?i){{{}}}'.format(key), str(attributes.get(key, 'unknown')), result)
if re.search(fr'(?i).*{{{key}}}.*', result):
result = re.sub(fr'(?i){{{key}}}', str(attributes.get(key, 'unknown')), result)
logger.log(9, f'Found key string {key} in name')
result = result.replace('/', '')
@ -67,7 +75,7 @@ class FileNameFormatter:
'postid': comment.id,
'upvotes': comment.score,
'flair': '',
'date': comment.created_utc
'date': comment.created_utc,
}
return comment_attributes
@ -75,8 +83,12 @@ class FileNameFormatter:
self,
resource: Resource,
destination_directory: Path,
index: Optional[int] = None) -> Path:
subfolder = destination_directory / self._format_name(resource.source_submission, self.directory_format_string)
index: Optional[int] = None,
) -> Path:
subfolder = Path(
destination_directory,
*[self._format_name(resource.source_submission, part) for part in self.directory_format_string]
)
index = f'_{str(index)}' if index else ''
if not resource.extension:
raise BulkDownloaderException(f'Resource from {resource.url} has no extension')
@ -102,8 +114,11 @@ class FileNameFormatter:
filename = filename[:-1]
return filename + ending
def format_resource_paths(self, resources: list[Resource],
destination_directory: Path) -> list[tuple[Path, Resource]]:
def format_resource_paths(
self,
resources: list[Resource],
destination_directory: Path,
) -> list[tuple[Path, Resource]]:
out = []
if len(resources) == 1:
out.append((self.format_path(resources[0], destination_directory, None), resources[0]))
@ -121,7 +136,8 @@ class FileNameFormatter:
if result:
if 'POSTID' not in test_string:
logger.warning(
f'Post ID not included in this file scheme, so file names are not guaranteed to be unique')
'Some files might not be downloaded due to name conflicts as filenames are'
' not guaranteed to be be unique without {POSTID}')
return True
else:
return False

View file

@ -26,7 +26,7 @@ def submission() -> MagicMock:
return test
@pytest.fixture()
@pytest.fixture(scope='session')
def reddit_submission(reddit_instance: praw.Reddit) -> praw.models.Submission:
return reddit_instance.submission(id='lgilgt')
@ -267,9 +267,30 @@ def test_format_archive_entry_comment(
test_comment_id: str,
expected_name: str,
tmp_path: Path,
reddit_instance: praw.Reddit):
reddit_instance: praw.Reddit,
):
test_comment = reddit_instance.comment(id=test_comment_id)
test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme)
test_entry = Resource(test_comment, '', '.json')
result = test_formatter.format_path(test_entry, tmp_path)
assert result.name == expected_name
@pytest.mark.parametrize(('test_folder_scheme', 'expected'), (
('{REDDITOR}/{SUBREDDIT}', 'person/randomreddit'),
('{POSTID}/{SUBREDDIT}/{REDDITOR}', '12345/randomreddit/person'),
))
def test_multilevel_folder_scheme(
test_folder_scheme: str,
expected: str,
tmp_path: Path,
submission: MagicMock,
):
test_formatter = FileNameFormatter('{POSTID}', test_folder_scheme)
test_resource = MagicMock()
test_resource.source_submission = submission
test_resource.extension = '.png'
result = test_formatter.format_path(test_resource, tmp_path)
result = result.relative_to(tmp_path)
assert str(result.parent) == expected
assert len(result.parents) == (len(expected.split('/')) + 1)