From 11125265434bb46a60c389f13a45b0ef0a6e8298 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 31 May 2021 19:31:11 -0400 Subject: [PATCH] add option ENFORCE_ATOMIC_WRITES to allow disabling forced FSYNC writes on network drives --- archivebox/config.py | 1 + archivebox/system.py | 24 +++++++++++++++++------- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/archivebox/config.py b/archivebox/config.py index 6f031faf..45dee650 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -77,6 +77,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { 'OUTPUT_PERMISSIONS': {'type': str, 'default': '755'}, 'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'}, 'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'}, # to avoid downloading code assets as their own pages + 'ENFORCE_ATOMIC_WRITES': {'type': bool, 'default': True}, }, 'SERVER_CONFIG': { diff --git a/archivebox/system.py b/archivebox/system.py index 698c89f6..91a51a21 100644 --- a/archivebox/system.py +++ b/archivebox/system.py @@ -14,7 +14,7 @@ from crontab import CronTab from .vendor.atomicwrites import atomic_write as lib_atomic_write from .util import enforce_types, ExtendedEncoder -from .config import PYTHON_BINARY, OUTPUT_PERMISSIONS +from .config import PYTHON_BINARY, OUTPUT_PERMISSIONS, ENFORCE_ATOMIC_WRITES @@ -78,7 +78,7 @@ def run(cmd, *args, input=None, capture_output=True, timeout=None, check=False, @enforce_types -def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], overwrite: bool=True) -> None: +def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], overwrite: bool=True, permissions: str=OUTPUT_PERMISSIONS) -> None: """Safe atomic write to filesystem by writing to temp file + atomic rename""" mode = 'wb+' if isinstance(contents, bytes) else 'w' @@ -92,11 +92,21 @@ def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], over elif isinstance(contents, (bytes, str)): f.write(contents) except OSError as e: - print(f"[X] OSError: Failed to write {path} with fcntl.F_FULLFSYNC. ({e})") - print(" You can store the archive/ subfolder on a hard drive or network share that doesn't support support syncronous writes,") - print(" but the main folder containing the index.sqlite3 and ArchiveBox.conf files must be on a filesystem that supports FSYNC.") - raise SystemExit(1) - os.chmod(path, int(OUTPUT_PERMISSIONS, base=8)) + if ENFORCE_ATOMIC_WRITES: + print(f"[X] OSError: Failed to write {path} with fcntl.F_FULLFSYNC. ({e})") + print(" You can store the archive/ subfolder on a hard drive or network share that doesn't support support syncronous writes,") + print(" but the main folder containing the index.sqlite3 and ArchiveBox.conf files must be on a filesystem that supports FSYNC.") + raise SystemExit(1) + + # retry the write without forcing FSYNC (aka atomic mode) + with open(path, mode=mode, encoding=encoding) as f: + if isinstance(contents, dict): + dump(contents, f, indent=4, sort_keys=True, cls=ExtendedEncoder) + elif isinstance(contents, (bytes, str)): + f.write(contents) + + # set permissions + os.chmod(path, int(permissions, base=8)) @enforce_types def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS) -> None: