diff --git a/bdfr/file_name_formatter.py b/bdfr/file_name_formatter.py index d6bdace..9e7b335 100644 --- a/bdfr/file_name_formatter.py +++ b/bdfr/file_name_formatter.py @@ -44,7 +44,7 @@ class FileNameFormatter: for key in attributes.keys(): if re.search(fr'(?i).*{{{key}}}.*', result): key_value = str(attributes.get(key, 'unknown')) - key_value = bytes(key_value, 'utf-8').decode('unicode-escape') + key_value = FileNameFormatter._convert_unicode_escapes(key_value) result = re.sub(fr'(?i){{{key}}}', key_value, result,) logger.log(9, f'Found key string {key} in name') @@ -55,6 +55,16 @@ class FileNameFormatter: return result + @staticmethod + def _convert_unicode_escapes(in_string: str) -> str: + pattern = re.compile(r'(\\u\d{4})') + matches = re.search(pattern, in_string) + if matches: + for match in matches.groups(): + converted_match = bytes(match, 'utf-8').decode('unicode-escape') + in_string = in_string.replace(match, converted_match) + return in_string + @staticmethod def _generate_name_dict_from_submission(submission: Submission) -> dict: submission_attributes = { diff --git a/bdfr/tests/test_file_name_formatter.py b/bdfr/tests/test_file_name_formatter.py index 96252e4..fe8372a 100644 --- a/bdfr/tests/test_file_name_formatter.py +++ b/bdfr/tests/test_file_name_formatter.py @@ -297,12 +297,23 @@ def test_multilevel_folder_scheme( assert len(result.parents) == (len(expected.split('/')) + 1) -@pytest.mark.online -@pytest.mark.reddit -@pytest.mark.parametrize(('test_submission_id', 'test_file_scheme', 'expected'), ( - ('mecwk7', '{TITLE}', 'My cat’s paws are so cute'), # Unicode escape in title +@pytest.mark.parametrize(('test_name_string', 'expected'), ( + ('test', 'test'), + ('😍', '😍'), + ('test😍', 'test😍'), + ('test😍 ’', 'test😍 ’'), + ('test😍 \\u2019', 'test😍 ’'), )) -def test_edge_case_names(test_submission_id: str, test_file_scheme: str, expected: str, reddit_instance: praw.Reddit): - test_submission = reddit_instance.submission(id=test_submission_id) - result = FileNameFormatter._format_name(test_submission, test_file_scheme) +def test_preserve_emojis(test_name_string: str, expected: str, submission: MagicMock): + submission.title = test_name_string + result = FileNameFormatter._format_name(submission, '{TITLE}') + assert result == expected + + +@pytest.mark.parametrize(('test_string', 'expected'), ( + ('test \\u2019', 'test ’'), + ('My cat\\u2019s paws are so cute', 'My cat’s paws are so cute'), +)) +def test_convert_unicode_escapes(test_string: str, expected: str): + result = FileNameFormatter._convert_unicode_escapes(test_string) assert result == expected