1
0
Fork 0
mirror of synced 2024-06-30 12:00:48 +12:00

Add path limit fix

This commit is contained in:
Serene-Arc 2021-05-18 12:39:08 +10:00
parent 346df4726d
commit 9a1e1ebea1
2 changed files with 53 additions and 21 deletions

View file

@ -4,6 +4,7 @@ import datetime
import logging import logging
import platform import platform
import re import re
import subprocess
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
@ -104,32 +105,46 @@ class FileNameFormatter:
) -> Path: ) -> Path:
subfolder = Path( subfolder = Path(
destination_directory, destination_directory,
*[self._format_name(resource.source_submission, part) for part in self.directory_format_string] *[self._format_name(resource.source_submission, part) for part in self.directory_format_string],
) )
index = f'_{str(index)}' if index else '' index = f'_{str(index)}' if index else ''
if not resource.extension: if not resource.extension:
raise BulkDownloaderException(f'Resource from {resource.url} has no extension') raise BulkDownloaderException(f'Resource from {resource.url} has no extension')
ending = index + resource.extension ending = index + resource.extension
file_name = str(self._format_name(resource.source_submission, self.file_format_string)) file_name = str(self._format_name(resource.source_submission, self.file_format_string))
file_name = self._limit_file_name_length(file_name, ending)
try: try:
file_path = Path(subfolder, file_name) file_path = self._limit_file_name_length(file_name, ending, subfolder)
except TypeError: except TypeError:
raise BulkDownloaderException(f'Could not determine path name: {subfolder}, {index}, {resource.extension}') raise BulkDownloaderException(f'Could not determine path name: {subfolder}, {index}, {resource.extension}')
return file_path return file_path
@staticmethod @staticmethod
def _limit_file_name_length(filename: str, ending: str) -> str: def _limit_file_name_length(filename: str, ending: str, root: Path) -> Path:
root = root.resolve().expanduser()
possible_id = re.search(r'((?:_\w{6})?$)', filename) possible_id = re.search(r'((?:_\w{6})?$)', filename)
if possible_id: if possible_id:
ending = possible_id.group(1) + ending ending = possible_id.group(1) + ending
filename = filename[:possible_id.start()] filename = filename[:possible_id.start()]
max_path = FileNameFormatter.find_max_path_length()
max_length_chars = 255 - len(ending) max_length_chars = 255 - len(ending)
max_length_bytes = 255 - len(ending.encode('utf-8')) max_length_bytes = 255 - len(ending.encode('utf-8'))
while len(filename) > max_length_chars or len(filename.encode('utf-8')) > max_length_bytes: max_path_length = max_path - len(ending) - len(str(root)) - 1
while len(filename) > max_length_chars or \
len(filename.encode('utf-8')) > max_length_bytes or \
len(filename) > max_path_length:
filename = filename[:-1] filename = filename[:-1]
return filename + ending return Path(root, filename + ending)
@staticmethod
def find_max_path_length() -> int:
try:
return int(subprocess.check_output(['getconf', 'PATH_MAX', '/']))
except (ValueError, subprocess.CalledProcessError, OSError):
if platform.system() == 'Windows':
return 260
else:
return 4096
def format_resource_paths( def format_resource_paths(
self, self,

View file

@ -1,11 +1,12 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# coding=utf-8 # coding=utf-8
import platform
import unittest.mock
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from unittest.mock import MagicMock from unittest.mock import MagicMock
import platform
import praw.models import praw.models
import pytest import pytest
@ -28,10 +29,10 @@ def submission() -> MagicMock:
return test return test
def do_test_string_equality(result: str, expected: str) -> bool: def do_test_string_equality(result: [Path, str], expected: str) -> bool:
if platform.system() == 'Windows': if platform.system() == 'Windows':
expected = FileNameFormatter._format_for_windows(expected) expected = FileNameFormatter._format_for_windows(expected)
return expected == result return str(result).endswith(expected)
def do_test_path_equality(result: Path, expected: str) -> bool: def do_test_path_equality(result: Path, expected: str) -> bool:
@ -41,7 +42,7 @@ def do_test_path_equality(result: Path, expected: str) -> bool:
expected = Path(*expected) expected = Path(*expected)
else: else:
expected = Path(expected) expected = Path(expected)
return result == expected return str(result).endswith(str(expected))
@pytest.fixture(scope='session') @pytest.fixture(scope='session')
@ -173,7 +174,9 @@ def test_format_multiple_resources():
test_formatter = FileNameFormatter('{TITLE}', '', 'ISO') test_formatter = FileNameFormatter('{TITLE}', '', 'ISO')
results = test_formatter.format_resource_paths(mocks, Path('.')) results = test_formatter.format_resource_paths(mocks, Path('.'))
results = set([str(res[0]) for res in results]) results = set([str(res[0]) for res in results])
assert results == {'test_1.png', 'test_2.png', 'test_3.png', 'test_4.png'} expected = set([str(Path(Path('.'), name).resolve())
for name in ('test_1.png', 'test_2.png', 'test_3.png', 'test_4.png')])
assert results == expected
@pytest.mark.parametrize(('test_filename', 'test_ending'), ( @pytest.mark.parametrize(('test_filename', 'test_ending'), (
@ -183,10 +186,11 @@ def test_format_multiple_resources():
('😍💕✨' * 100, '_1.png'), ('😍💕✨' * 100, '_1.png'),
)) ))
def test_limit_filename_length(test_filename: str, test_ending: str): def test_limit_filename_length(test_filename: str, test_ending: str):
result = FileNameFormatter._limit_file_name_length(test_filename, test_ending) result = FileNameFormatter._limit_file_name_length(test_filename, test_ending, Path('.'))
assert len(result) <= 255 assert len(result.name) <= 255
assert len(result.encode('utf-8')) <= 255 assert len(result.name.encode('utf-8')) <= 255
assert isinstance(result, str) assert len(str(result)) <= FileNameFormatter.find_max_path_length()
assert isinstance(result, Path)
@pytest.mark.parametrize(('test_filename', 'test_ending', 'expected_end'), ( @pytest.mark.parametrize(('test_filename', 'test_ending', 'expected_end'), (
@ -201,11 +205,11 @@ def test_limit_filename_length(test_filename: str, test_ending: str):
('😍💕✨' * 100 + '_aaa1aa', '_1.png', '_aaa1aa_1.png'), ('😍💕✨' * 100 + '_aaa1aa', '_1.png', '_aaa1aa_1.png'),
)) ))
def test_preserve_id_append_when_shortening(test_filename: str, test_ending: str, expected_end: str): def test_preserve_id_append_when_shortening(test_filename: str, test_ending: str, expected_end: str):
result = FileNameFormatter._limit_file_name_length(test_filename, test_ending) result = FileNameFormatter._limit_file_name_length(test_filename, test_ending, Path('.'))
assert len(result) <= 255 assert len(result.name) <= 255
assert len(result.encode('utf-8')) <= 255 assert len(result.name.encode('utf-8')) <= 255
assert isinstance(result, str) assert result.name.endswith(expected_end)
assert result.endswith(expected_end) assert len(str(result)) <= FileNameFormatter.find_max_path_length()
def test_shorten_filenames(submission: MagicMock, tmp_path: Path): def test_shorten_filenames(submission: MagicMock, tmp_path: Path):
@ -295,7 +299,7 @@ def test_format_archive_entry_comment(
test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme, 'ISO') test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme, 'ISO')
test_entry = Resource(test_comment, '', '.json') test_entry = Resource(test_comment, '', '.json')
result = test_formatter.format_path(test_entry, tmp_path) result = test_formatter.format_path(test_entry, tmp_path)
assert do_test_string_equality(result.name, expected_name) assert do_test_string_equality(result, expected_name)
@pytest.mark.parametrize(('test_folder_scheme', 'expected'), ( @pytest.mark.parametrize(('test_folder_scheme', 'expected'), (
@ -364,3 +368,16 @@ def test_time_string_formats(test_time_format: str, expected: str):
test_formatter = FileNameFormatter('{TITLE}', '', test_time_format) test_formatter = FileNameFormatter('{TITLE}', '', test_time_format)
result = test_formatter._convert_timestamp(test_time.timestamp()) result = test_formatter._convert_timestamp(test_time.timestamp())
assert result == expected assert result == expected
def test_get_max_path_length():
result = FileNameFormatter.find_max_path_length()
assert result in (4096, 260)
def test_windows_max_path():
with unittest.mock.patch('platform.system', return_value='Windows'):
with unittest.mock.patch('bdfr.file_name_formatter.FileNameFormatter.find_max_path_length', return_value=260):
result = FileNameFormatter._limit_file_name_length('test' * 50, '_1.png', Path('test' * 25))
assert len(str(result)) <= 260
assert len(result.name) <= 75