From 91c46411990147fa9db4a0b35a3a195bad78673f Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 12 Feb 2024 21:26:34 -0800 Subject: [PATCH] skip dir size calculation when path is too long --- archivebox/system.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/archivebox/system.py b/archivebox/system.py index d80a2cb5..6e03846f 100644 --- a/archivebox/system.py +++ b/archivebox/system.py @@ -146,20 +146,24 @@ def get_dir_size(path: Union[str, Path], recursive: bool=True, pattern: Optional recursively and limiting to a given filter list """ num_bytes, num_dirs, num_files = 0, 0, 0 - for entry in os.scandir(path): - if (pattern is not None) and (pattern not in entry.path): - continue - if entry.is_dir(follow_symlinks=False): - if not recursive: + try: + for entry in os.scandir(path): + if (pattern is not None) and (pattern not in entry.path): continue - num_dirs += 1 - bytes_inside, dirs_inside, files_inside = get_dir_size(entry.path) - num_bytes += bytes_inside - num_dirs += dirs_inside - num_files += files_inside - else: - num_bytes += entry.stat(follow_symlinks=False).st_size - num_files += 1 + if entry.is_dir(follow_symlinks=False): + if not recursive: + continue + num_dirs += 1 + bytes_inside, dirs_inside, files_inside = get_dir_size(entry.path) + num_bytes += bytes_inside + num_dirs += dirs_inside + num_files += files_inside + else: + num_bytes += entry.stat(follow_symlinks=False).st_size + num_files += 1 + except OSError: + # e.g. FileNameTooLong or other error while trying to read dir + pass return num_bytes, num_dirs, num_files