diff --git a/.dockerignore b/.dockerignore index d870390d..f4c91a47 100644 --- a/.dockerignore +++ b/.dockerignore @@ -16,6 +16,7 @@ venv/ .docker-venv/ node_modules/ +docs/ build/ dist/ brew_dist/ diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 9840f7ae..1cdddd6f 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -11,8 +11,7 @@ on: env: DOCKER_IMAGE: archivebox-ci - - + jobs: buildx: runs-on: ubuntu-latest @@ -60,13 +59,11 @@ jobs: uses: docker/metadata-action@v5 with: images: archivebox/archivebox,nikisweeting/archivebox - flavor: | - latest=auto tags: | type=ref,event=branch type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} - type=sha + type=raw,value=latest,enable={{is_default_branch}} - name: Build and push id: docker_build @@ -78,8 +75,18 @@ jobs: push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.docker_meta.outputs.tags }} cache-from: type=local,src=/tmp/.buildx-cache - cache-to: type=local,dest=/tmp/.buildx-cache + cache-to: type=local,dest=/tmp/.buildx-cache-new platforms: linux/amd64,linux/arm64,linux/arm/v7 - name: Image digest run: echo ${{ steps.docker_build.outputs.digest }} + + # This ugly bit is necessary if you don't want your cache to grow forever + # until it hits GitHub's limit of 5GB. + # Temp fix + # https://github.com/docker/build-push-action/issues/252 + # https://github.com/moby/buildkit/issues/1896 + - name: Move cache + run: | + rm -rf /tmp/.buildx-cache + mv /tmp/.buildx-cache-new /tmp/.buildx-cache diff --git a/Dockerfile b/Dockerfile index 3a700784..b569a1a6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -73,7 +73,8 @@ COPY --chown=root:root --chmod=755 package.json "$CODE_DIR/" RUN grep '"version": ' "${CODE_DIR}/package.json" | awk -F'"' '{print $4}' > /VERSION.txt # Force apt to leave downloaded binaries in /var/cache/apt (massively speeds up Docker builds) -RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache +RUN echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache \ + && rm -f /etc/apt/apt.conf.d/docker-clean # Print debug info about build and save it to disk, for human eyes only, not used by anything else RUN (echo "[i] Docker build for ArchiveBox $(cat /VERSION.txt) starting..." \ @@ -123,7 +124,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \ echo "[+] Installing Node $NODE_VERSION environment in $NODE_MODULES..." \ && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" >> /etc/apt/sources.list.d/nodejs.list \ - && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \ + && curl -fsSL "https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key" | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \ && apt-get update -qq \ && apt-get install -qq -y -t bookworm-backports --no-install-recommends \ nodejs libatomic1 python3-minimal \ @@ -202,7 +203,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T && chown -R $ARCHIVEBOX_USER "$PLAYWRIGHT_BROWSERS_PATH" \ # Save version info && ( \ - which chromium-browser && /usr/bin/chromium-browser --version \ + which chromium-browser && /usr/bin/chromium-browser --version || /usr/lib/chromium/chromium --version \ && echo -e '\n\n' \ ) | tee -a /VERSION.txt @@ -246,15 +247,15 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T COPY --chown=root:root --chmod=755 "." "$CODE_DIR/" RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \ echo "[*] Installing PIP ArchiveBox package from $CODE_DIR..." \ - && apt-get update -qq \ + # && apt-get update -qq \ # install C compiler to build deps on platforms that dont have 32-bit wheels available on pypi - && apt-get install -qq -y -t bookworm-backports --no-install-recommends \ - build-essential \ + # && apt-get install -qq -y -t bookworm-backports --no-install-recommends \ + # build-essential \ # INSTALL ARCHIVEBOX python package globally from CODE_DIR, with all optional dependencies && pip install -e "$CODE_DIR"[sonic,ldap] \ # save docker image size and always remove compilers / build tools after building is complete - && apt-get purge -y build-essential \ - && apt-get autoremove -y \ + # && apt-get purge -y build-essential \ + # && apt-get autoremove -y \ && rm -rf /var/lib/apt/lists/* #################################################### @@ -276,11 +277,10 @@ ENV IN_DOCKER=True # Print version for nice docker finish summary RUN (echo -e "\n\n[√] Finished Docker build succesfully. Saving build summary in: /VERSION.txt" \ - && echo -e "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})" \ - && echo -e "BUILD_END_TIME=$(date +"%Y-%m-%d %H:%M:%S %s") TZ=${TZ}\n\n" \ - && "$CODE_DIR/bin/docker_entrypoint.sh" \ - archivebox version 2>&1 \ + && echo -e "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})\n" \ + && echo -e "BUILD_END_TIME=$(date +"%Y-%m-%d %H:%M:%S %s")\n\n" \ ) | tee -a /VERSION.txt +RUN "$CODE_DIR"/bin/docker_entrypoint.sh version 2>&1 | tee -a /VERSION.txt #################################################### diff --git a/README.md b/README.md index 9a561a28..df8aa0b6 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@
- +

ArchiveBox
Open-source self-hosted web archiving.


@@ -33,9 +33,9 @@ curl -sSL 'https://get.archivebox.io' | sh # (or see pip/brew/Docker instruct **ArchiveBox is a powerful, self-hosted internet archiving solution to collect, save, and view websites offline.** -Without active preservation effort, everything on the internet eventually dissapears or degrades. Archive.org does a great job as a free central archive, but they require all archives to be public, and they cant save every type of content. +Without active preservation effort, everything on the internet eventually dissapears or degrades. Archive.org does a great job as a free central archive, but they require all archives to be public, and they can't save every type of content. -*ArchiveBox is an open source tool that helps you archive web content on your own (or privately within an organization): save sharable copies of browser bookmarks, preserve evidence for legal cases, backup photos on FB / Insta / Flickr, download your media from YT / Soundcloud / etc., snapshot research papers in academic citations, and more...* +*ArchiveBox is an open source tool that helps you archive web content on your own (or privately within an organization): save copies of browser bookmarks, preserve evidence for legal cases, backup photos from FB / Insta / Flickr, download your media from YT / Soundcloud / etc., snapshot research papers & academic citations, and more...* > ➡️ *Use ArchiveBox as a [command-line package](#quickstart) and/or [self-hosted web app](#quickstart) on Linux, macOS, or in [Docker](#quickstart).* @@ -320,6 +320,10 @@ See the pip-archive
Arch pacman / FreeBSD pkg / Nix nix (Arch/FreeBSD/NixOS/more)
+ +> [!WARNING] +> *These are contributed by external volunteers and may lag behind the official `pip` channel.* +