Fork 0
mirror of synced 2024-06-01 18:20:20 +12:00
gnattu bc2bfc1cc7
fix: handle archive folder permission more graceful
On mounted network shares, chmod might be hard or impossible to do without modify the server config. Ignore the archive folder because we already tested if that folder is writeable and printed warnings to the user.
2024-01-05 12:57:01 +08:00

159 lines
9.3 KiB
Executable file

# This Docker ENTRYPOINT script is called by `docker run archivebox ...` or `docker compose run archivebox ...`.
# It takes a CMD as $* shell arguments and runs it following these setup steps:
# - Set the archivebox user to use the correct PUID & PGID
# 1. highest precedence is for valid PUID and PGID env vars passsed in explicitly
# 2. fall back to DETECTED_PUID of files found within existing data dir
# 3. fall back to DEFAULT_PUID if no data dir or its owned by root
# - Create a new /data dir if necessary and set the correct ownership on it
# - Create a new /browsers dir if necessary and set the correct ownership on it
# - Check whether we're running inside QEMU emulation and show a warning if so.
# - Check that enough free space is available on / and /data
# - Drop down to archivebox user permisisons and execute passed CMD command.
# Bash Environment Setup
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
# set -o xtrace
# set -o nounset
set -o errexit
set -o errtrace
set -o pipefail
# IFS=$'\n'
# Load global invariants (set by Dockerfile during image build time, not intended to be customized by users at runtime)
export DATA_DIR="${DATA_DIR:-/data}"
# Global default PUID and PGID if data dir is empty and no intended PUID+PGID is set manually by user
export DEFAULT_PUID=911
export DEFAULT_PGID=911
# If user tires to set PUID and PGID to root values manually, catch and reject because root is not allowed
if [[ "$PUID" == "0" ]]; then
echo -e "\n[X] Error: Got PUID=$PUID and PGID=$PGID but ArchiveBox is not allowed to be run as root, please change or unset PUID & PGID and try again." > /dev/stderr
echo -e " Hint: some NFS/SMB/FUSE/etc. filesystems force-remap/ignore all permissions," > /dev/stderr
echo -e " leave PUID/PGID unset, or use values the filesystem prefers (defaults to $DEFAULT_PUID:$DEFAULT_PGID)" > /dev/stderr
echo -e " https://linux.die.net/man/8/mount.cifs#:~:text=does%20not%20provide%20unix%20ownership" > /dev/stderr
exit 3
# If data directory already exists, autodetect detect owner by looking at files within
export DETECTED_PUID="$(stat -c '%u' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PUID")"
export DETECTED_PGID="$(stat -c '%g' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PGID")"
# If data directory exists but is owned by root, use defaults instead of root because root is not allowed
# Set archivebox user and group ids to desired PUID/PGID
usermod -o -u "${PUID:-$DETECTED_PUID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
groupmod -o -g "${PGID:-$DETECTED_PGID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
# re-set PUID and PGID to values reported by system instead of values we tried to set,
# in case wonky filesystems or Docker setups try to play UID/GID remapping tricks on us
export PUID="$(id -u archivebox)"
export PGID="$(id -g archivebox)"
# Check the permissions of the data dir (or create if it doesn't exist)
if [[ -d "$DATA_DIR/archive" ]]; then
if touch "$DATA_DIR/archive/.permissions_test_safe_to_delete" 2>/dev/null; then
# It's fine, we are able to write to the data directory (as root inside the container)
rm -f "$DATA_DIR/archive/.permissions_test_safe_to_delete"
# echo "[√] Permissions are correct"
# the only time this fails is if the host filesystem doesn't allow us to write as root (e.g. some NFS mapall/maproot problems, connection issues, drive dissapeared, etc.)
echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data/archive dir (currently owned by $(stat -c '%u' "$DATA_DIR/archive"):$(stat -c '%g' "$DATA_DIR/archive")." >&2
echo -e " Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:" > /dev/stderr
echo -e " \$ chown -R $PUID:$PGID ./data/archive\n" > /dev/stderr
echo -e " Configure the PUID & PGID environment variables to change the desired owner:" > /dev/stderr
echo -e " https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" > /dev/stderr
echo -e " Hint: some NFS/SMB/FUSE/etc. filesystems force-remap/ignore all permissions," > /dev/stderr
echo -e " leave PUID/PGID unset, or use values the filesystem prefers (defaults to $DEFAULT_PUID:$DEFAULT_PGID)" > /dev/stderr
echo -e " https://linux.die.net/man/8/mount.cifs#:~:text=does%20not%20provide%20unix%20ownership" > /dev/stderr
exit 3
# create data directory
mkdir -p "$DATA_DIR/logs"
# force set the ownership of the data dir contents to the archivebox user and group
# this is needed because Docker Desktop often does not map user permissions from the host properly
find "$DATA_DIR" -type d -not -path "$DATA_DIR/archive*" -exec chown $PUID:$PGID {} \;
find "$DATA_DIR" -type f -not -path "$DATA_DIR/archive/*" -exec chown $PUID:$PGID {} \;
# also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to install chrome at runtime
mkdir -p "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
rm -Rf "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
# (this check is written in blood in 2023, QEMU silently breaks things in ways that are not obvious)
export IN_QEMU="$(pmap 1 | grep qemu >/dev/null && echo 'True' || echo 'False')"
if [[ "$IN_QEMU" == "True" ]]; then
echo -e "\n[!] Warning: Running $(uname -m) docker image using QEMU emulation, some things will break!" > /dev/stderr
echo -e " chromium (screenshot, pdf, dom), singlefile, and any dependencies that rely on inotify will not run in QEMU." > /dev/stderr
echo -e " See here for more info: https://github.com/microsoft/playwright/issues/17395#issuecomment-1250830493\n" > /dev/stderr
# check disk space free on / and /data, warn on <500Mb free, error on <100Mb free
export ROOT_USAGE="$(df --output=pcent,avail / | tail -n 1 | xargs)"
export ROOT_USED_PCT="${ROOT_USAGE%%%*}"
export ROOT_AVAIL_KB="$(echo "$ROOT_USAGE" | awk '{print $2}')"
if [[ "$ROOT_AVAIL_KB" -lt 100000 ]]; then
echo -e "\n[!] Warning: Docker root filesystem is completely out of space! (${ROOT_USED_PCT}% used on /)" > /dev/stderr
echo -e " you need to free up at least 100Mb in your Docker VM to continue:" > /dev/stderr
echo -e " \$ docker system prune\n" > /dev/stderr
df -kh / > /dev/stderr
exit 3
elif [[ "$ROOT_USED_PCT" -ge 99 ]] || [[ "$ROOT_AVAIL_KB" -lt 500000 ]]; then
echo -e "\n[!] Warning: Docker root filesystem is running out of space! (${ROOT_USED_PCT}% used on /)" > /dev/stderr
echo -e " you may need to free up space in your Docker VM soon:" > /dev/stderr
echo -e " \$ docker system prune\n" > /dev/stderr
df -kh / > /dev/stderr
export DATA_USAGE="$(df --output=pcent,avail /data | tail -n 1 | xargs)"
export DATA_USED_PCT="${DATA_USAGE%%%*}"
export DATA_AVAIL_KB="$(echo "$DATA_USAGE" | awk '{print $2}')"
if [[ "$DATA_AVAIL_KB" -lt 100000 ]]; then
echo -e "\n[!] Warning: Docker data volume is completely out of space! (${DATA_USED_PCT}% used on /data)" > /dev/stderr
echo -e " you need to free up at least 100Mb on the drive holding your data directory" > /dev/stderr
echo -e " \$ ncdu -x data\n" > /dev/stderr
df -kh /data > /dev/stderr
sleep 5
elif [[ "$DATA_USED_PCT" -ge 99 ]] || [[ "$ROOT_AVAIL_KB" -lt 500000 ]]; then
echo -e "\n[!] Warning: Docker data volume is running out of space! (${DATA_USED_PCT}% used on /data)" > /dev/stderr
echo -e " you may need to free up space on the drive holding your data directory soon" > /dev/stderr
echo -e " \$ ncdu -x data\n" > /dev/stderr
df -kh /data > /dev/stderr
export ARCHIVEBOX_BIN_PATH="$(which archivebox)"
# Drop permissions to run commands as the archivebox user
if [[ "$1" == /* || "$1" == "bash" || "$1" == "sh" || "$1" == "echo" || "$1" == "cat" || "$1" == "whoami" || "$1" == "archivebox" ]]; then
# handle "docker run archivebox /bin/somecommand --with=some args" by passing args directly to bash -c
# e.g. "docker run archivebox archivebox init:
# "docker run archivebox /venv/bin/ipython3"
# "docker run archivebox /bin/bash -c '...'"
# "docker run archivebox cat /VERSION.txt"
exec gosu "$PUID" /bin/bash -c "exec $(printf ' %q' "$@")"
# printf requotes shell parameters properly https://stackoverflow.com/a/39463371/2156113
# gosu spawns an ephemeral bash process owned by archivebox user (bash wrapper is needed to load env vars, PATH, and setup terminal TTY)
# outermost exec hands over current process ID to inner bash process, inner exec hands over inner bash PID to user's command
# handle "docker run archivebox add some subcommand --with=args abc" by calling archivebox to run as args as CLI subcommand
# e.g. "docker run archivebox help"
# "docker run archivebox add --depth=1 https://example.com"
# "docker run archivebox manage createsupseruser"
# "docker run archivebox server"
exec gosu "$PUID" "$ARCHIVEBOX_BIN_PATH" "$@"