diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 1cdddd6f..75c7658c 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -63,6 +63,7 @@ jobs: type=ref,event=branch type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} + type=sha type=raw,value=latest,enable={{is_default_branch}} - name: Build and push diff --git a/archivebox/config.py b/archivebox/config.py index 9389dbb4..234f1fc9 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -470,6 +470,8 @@ def can_upgrade(config): ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE +# These are derived/computed values calculated *after* all user-provided config values are ingested +# they appear in `archivebox config` output and are intended to be read-only for the user DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { 'TERM_WIDTH': {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns}, 'USER': {'default': lambda c: SYSTEM_USER}, @@ -488,13 +490,13 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { 'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None)}, # None means unset, so we autodetect it with find_chrome_Data_dir(), but emptystring '' means user manually set it to '', and we should store it as None 'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)}, 'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)}, - 'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')}, + 'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')}, # exec is always needed to list directories 'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0] or bin_path('archivebox')}, - 'VERSION': {'default': lambda c: get_version(c).split('+', 1)[0]}, - 'COMMIT_HASH': {'default': lambda c: get_commit_hash(c)}, - 'BUILD_TIME': {'default': lambda c: get_build_time(c)}, + 'VERSION': {'default': lambda c: get_version(c).split('+', 1)[0]}, # remove +editable from user-displayed version string + 'COMMIT_HASH': {'default': lambda c: get_commit_hash(c)}, # short git commit hash of codebase HEAD commit + 'BUILD_TIME': {'default': lambda c: get_build_time(c)}, # docker build completed time or python src last modified time 'VERSIONS_AVAILABLE': {'default': lambda c: get_versions_available_on_github(c)}, 'CAN_UPGRADE': {'default': lambda c: can_upgrade(c)}, @@ -583,47 +585,60 @@ def load_config_val(key: str, config_file_vars: Optional[Dict[str, str]]=None) -> ConfigValue: """parse bool, int, and str key=value pairs from env""" + assert isinstance(config, dict) + is_read_only = type is None + if is_read_only: + if callable(default): + return default(config) + return default + + # get value from environment variables or config files config_keys_to_check = (key, *(aliases or ())) + val = None for key in config_keys_to_check: if env_vars: val = env_vars.get(key) if val: break + if config_file_vars: val = config_file_vars.get(key) if val: break - if type is None or val is None: + is_unset = val is None + if is_unset: if callable(default): - assert isinstance(config, dict) return default(config) - return default - elif type is bool: - if val.lower() in ('true', 'yes', '1'): + # calculate value based on expected type + BOOL_TRUEIES = ('true', 'yes', '1') + BOOL_FALSEIES = ('false', 'no', '0') + + if type is bool: + if val.lower() in BOOL_TRUEIES: return True - elif val.lower() in ('false', 'no', '0'): + elif val.lower() in BOOL_FALSEIES: return False else: raise ValueError(f'Invalid configuration option {key}={val} (expected a boolean: True/False)') elif type is str: - if val.lower() in ('true', 'false', 'yes', 'no', '1', '0'): - raise ValueError(f'Invalid configuration option {key}={val} (expected a string)') + if val.lower() in (*BOOL_TRUEIES, *BOOL_FALSEIES): + raise ValueError(f'Invalid configuration option {key}={val} (expected a string, but value looks like a boolean)') return val.strip() elif type is int: - if not val.isdigit(): + if not val.strip().isdigit(): raise ValueError(f'Invalid configuration option {key}={val} (expected an integer)') - return int(val) + return int(val.strip()) elif type is list or type is dict: return json.loads(val) - raise Exception('Config values can only be str, bool, int or json') + raise Exception('Config values can only be str, bool, int, or json') def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]: diff --git a/bin/docker_entrypoint.sh b/bin/docker_entrypoint.sh index bccaa808..700105b5 100755 --- a/bin/docker_entrypoint.sh +++ b/bin/docker_entrypoint.sh @@ -33,9 +33,9 @@ export DEFAULT_PGID=911 # If user tires to set PUID and PGID to root values manually, catch and reject because root is not allowed if [[ "$PUID" == "0" ]] || [[ "$PGID" == "0" ]]; then echo -e "\n[X] Error: Got PUID=$PUID and PGID=$PGID but ArchiveBox is not allowed to be run as root, please change or unset PUID & PGID and try again." > /dev/stderr - echo -e " Hint: some NFS/SMB/FUSE/etc. filesystems force-remap all permissions, leave PUID/PGID blank" > /dev/stderr - echo -e " or set PUID/PGID to the same value as the user/group they remap to (e.g. $DEFAULT_PUID:$DEFAULT_PGID)." > /dev/stderr - echo -e " https://linux.die.net/man/8/mount.cifs#:~:text=does%20not%20provide%20unix%20ownership" > /dev/stderr + echo -e " Hint: some NFS/SMB/FUSE/etc. filesystems force-remap/ignore all permissions," > /dev/stderr + echo -e " leave PUID/PGID unset, or use values the filesystem prefers (defaults to $DEFAULT_PUID:$DEFAULT_PGID)" > /dev/stderr + echo -e " https://linux.die.net/man/8/mount.cifs#:~:text=does%20not%20provide%20unix%20ownership" > /dev/stderr exit 3 fi @@ -69,6 +69,9 @@ if [[ -d "$DATA_DIR/archive" ]]; then echo -e " \$ chown -R $PUID:$PGID ./data\n" > /dev/stderr echo -e " Configure the PUID & PGID environment variables to change the desired owner:" > /dev/stderr echo -e " https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" > /dev/stderr + echo -e " Hint: some NFS/SMB/FUSE/etc. filesystems force-remap/ignore all permissions," > /dev/stderr + echo -e " leave PUID/PGID unset, or use values the filesystem prefers (defaults to $DEFAULT_PUID:$DEFAULT_PGID)" > /dev/stderr + echo -e " https://linux.die.net/man/8/mount.cifs#:~:text=does%20not%20provide%20unix%20ownership" > /dev/stderr exit 3 fi else @@ -89,7 +92,7 @@ chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/* rm -Rf "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete" -# (this check is written in blood, QEMU silently breaks things in ways that are not obvious) +# (this check is written in blood in 2023, QEMU silently breaks things in ways that are not obvious) export IN_QEMU="$(pmap 1 | grep qemu >/dev/null && echo 'True' || echo 'False')" if [[ "$IN_QEMU" == "True" ]]; then echo -e "\n[!] Warning: Running $(uname -m) docker image using QEMU emulation, some things will break!" > /dev/stderr @@ -100,9 +103,9 @@ fi # Drop permissions to run commands as the archivebox user if [[ "$1" == /* || "$1" == "bash" || "$1" == "sh" || "$1" == "echo" || "$1" == "cat" || "$1" == "archivebox" ]]; then - # handle "docker run archivebox /some/non-archivebox/command --with=some args" by passing args directly to bash -c + # handle "docker run archivebox /bin/somecommand --with=some args" by passing args directly to bash -c # e.g. "docker run archivebox archivebox init: - # "docker run archivebox /venv/bin/archivebox-alt init" + # "docker run archivebox /venv/bin/ipython3" # "docker run archivebox /bin/bash -c '...'" # "docker run archivebox cat /VERSION.txt" exec gosu "$PUID" bash -c "$*"