2024-06-29 11:30:46 +12:00
150 changed files with 3763 additions and 11988 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -17,11 +17,6 @@ venv/
 .venv-old/
 .docker-venv/
 node_modules/
-chrome/
-chromeprofile/
-
-pdm.dev.lock
-pdm.lock

 docs/
 build/
@ -33,7 +28,4 @@ assets/
 docker/

 data/
-data*/
 output/
-index.sqlite3
-index.sqlite3-wal
--- a/.gitattributes
+++ b/.gitattributes
@ -1,2 +0,0 @@
-**/*.lock
-**/*-lock.json
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@ -1,2 +1,3 @@
-github: ["ArchiveBox", "pirate"]
-custom: ["https://donate.archivebox.io", "https://paypal.me/NicholasSweeting"]
+github: pirate
+patreon: theSquashSH
+custom: ["https://twitter.com/ArchiveBoxApp", "https://paypal.me/NicholasSweeting", "https://www.blockchain.com/eth/address/0x5D4c34D4a121Fe08d1dDB7969F07550f2dB9f471", "https://www.blockchain.com/btc/address/1HuxXriPE2Bbnag3jJrqa3bkNHrs297dYH"]
--- a/.github/ISSUE_TEMPLATE/documentation_change.md
+++ b/.github/ISSUE_TEMPLATE/documentation_change.md
@ -6,7 +6,6 @@ labels: ''
 assignees: ''

 ---
-<!-- If you perfer, you can make a PR to https://github.com/ArchiveBox/docs instead of opening an issue -->

 ## Wiki Page URL
 <!-- e.g. https://github.com/pirate/ArchiveBox/wiki/Configuration#use_color -->
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -1,25 +0,0 @@
-# To get started with Dependabot version updates, you'll need to specify which
-# package ecosystems to update and where the package manifests are located.
-# Please see the documentation for all configuration options:
-# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
-
-version: 2
-updates:
-  - package-ecosystem: "pip"
-    directory: "/"
-    target-branch: "dev"
-    schedule:
-      interval: "monthly"
-    groups:
-      pip:
-        patterns:
-          - "*"
-  - package-ecosystem: "npm"
-    directory: "/"
-    target-branch: "dev"
-    schedule:
-      interval: "monthly"
-    groups:
-      npm:
-        patterns:
-          - "*"
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@ -0,0 +1,32 @@
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ dev ]
+  pull_request:
+    branches: [ dev ]
+  schedule:
+    - cron: '43 1 * * 2'
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ubuntu-latest
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: [ 'python' ]
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v1
+      with:
+        languages: ${{ matrix.language }}
+        queries: security-extended
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v1
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@ -1,92 +0,0 @@
-# For most projects, this workflow file will not need changing; you simply need
-# to commit it to your repository.
-#
-# You may wish to alter this file to override the set of languages analyzed,
-# or to provide custom queries or build logic.
-#
-# ******** NOTE ********
-# We have attempted to detect the languages in your repository. Please check
-# the `language` matrix defined below to confirm you have the correct set of
-# supported CodeQL languages.
-#
-name: "CodeQL"
-
-on:
-  push:
-    branches: [ "dev" ]
-  pull_request:
-    branches: [ "dev" ]
-  schedule:
-    - cron: '33 17 * * 6'
-
-jobs:
-  analyze:
-    name: Analyze (${{ matrix.language }})
-    # Runner size impacts CodeQL analysis time. To learn more, please see:
-    #   - https://gh.io/recommended-hardware-resources-for-running-codeql
-    #   - https://gh.io/supported-runners-and-hardware-resources
-    #   - https://gh.io/using-larger-runners (GitHub.com only)
-    # Consider using larger runners or machines with greater resources for possible analysis time improvements.
-    runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
-    timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
-    permissions:
-      # required for all workflows
-      security-events: write
-
-      # required to fetch internal or private CodeQL packs
-      packages: read
-
-      # only required for workflows in private repositories
-      actions: read
-      contents: read
-
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-        - language: python
-          build-mode: none
-        # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift'
-        # Use `c-cpp` to analyze code written in C, C++ or both
-        # Use 'java-kotlin' to analyze code written in Java, Kotlin or both
-        # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
-        # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,
-        # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.
-        # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
-        # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
-    steps:
-    - name: Checkout repository
-      uses: actions/checkout@v4
-
-    # Initializes the CodeQL tools for scanning.
-    - name: Initialize CodeQL
-      uses: github/codeql-action/init@v3
-      with:
-        languages: ${{ matrix.language }}
-        build-mode: ${{ matrix.build-mode }}
-        # If you wish to specify custom queries, you can do so here or in a config file.
-        # By default, queries listed here will override any specified in a config file.
-        # Prefix the list here with "+" to use these queries and those in the config file.
-
-        # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
-        # queries: security-extended,security-and-quality
-
-    # If the analyze step fails for one of the languages you are analyzing with
-    # "We were unable to automatically build your code", modify the matrix above
-    # to set the build mode to "manual" for that language. Then modify this step
-    # to build your code.
-    # ℹ️ Command-line programs to run using the OS shell.
-    # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
-    - if: matrix.build-mode == 'manual'
-      run: |
-        echo 'If you are using a "manual" build mode for one or more of the' \
-          'languages you are analyzing, replace this with the commands to build' \
-          'your code, for example:'
-        echo '  make bootstrap'
-        echo '  make release'
-        exit 1
-
-    - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v3
-      with:
-        category: "/language:${{matrix.language}}"
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -31,7 +31,7 @@ jobs:
        with:
          version: latest
          install: true
-          platforms: linux/amd64,linux/arm64
+          platforms: linux/amd64,linux/arm64,linux/arm/v7
      
      - name: Builder instance name
        run: echo ${{ steps.buildx.outputs.name }}
@ -51,26 +51,20 @@ jobs:
        uses: docker/login-action@v3
        if: github.event_name != 'pull_request'
        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_PASSWORD }}
+           username: ${{ secrets.DOCKER_USERNAME }}
+           password: ${{ secrets.DOCKER_PASSWORD }}
      
      - name: Collect Docker tags
-        # https://github.com/docker/metadata-action
        id: docker_meta
        uses: docker/metadata-action@v5
        with:
          images: archivebox/archivebox,nikisweeting/archivebox
          tags: |
-              # :stable
              type=ref,event=branch
-              # :0.7.3
              type=semver,pattern={{version}}
-              # :0.7
              type=semver,pattern={{major}}.{{minor}}
-              # :sha-463ea54
              type=sha
-              # :latest
-              type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'stable') }}
+              type=raw,value=latest,enable={{is_default_branch}}
      
      - name: Build and push
        id: docker_build
@ -83,18 +77,11 @@ jobs:
          tags: ${{ steps.docker_meta.outputs.tags }}
          cache-from: type=local,src=/tmp/.buildx-cache
          cache-to: type=local,dest=/tmp/.buildx-cache-new
-          platforms: linux/amd64,linux/arm64
+          platforms: linux/amd64,linux/arm64,linux/arm/v7

      - name: Image digest
        run: echo ${{ steps.docker_build.outputs.digest }}
       
-      - name: Update README
-        uses: peter-evans/dockerhub-description@v4
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_PASSWORD }}
-          repository: archivebox/archivebox
-
      # This ugly bit is necessary if you don't want your cache to grow forever
      # until it hits GitHub's limit of 5GB.
      # Temp fix
--- a/.github/workflows/pip.yml
+++ b/.github/workflows/pip.yml
@ -35,7 +35,7 @@ jobs:
          cache: true

      - name: Install dependencies
-        run: pdm install --fail-fast --no-lock --dev --group=':all' --no-self
+        run: pdm install --fail-fast --no-lock --group :all --no-self

      - name: Build package
        run: |
--- a/.gitignore
+++ b/.gitignore
@ -12,11 +12,6 @@ venv/
 .docker-venv/
 node_modules/

-# Ignore dev lockfiles (should always be built fresh)
-pdm.lock
-pdm.dev.lock
-requirements-dev.txt
-
 # Packaging artifacts
 .pdm-python
 .pdm-build
@ -27,12 +22,10 @@ dist/

 # Data folders
 data/
-data*/
+data1/
+data2/
+data3/
 output/
-index.sqlite3
-*.sqlite*
-data.*

 # vim
 *.sw?
-.vscode
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@ -30,4 +30,5 @@ formats:
 # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
 python:
  install:
+    - requirements: requirements.txt
    - requirements: docs/requirements.txt
--- a/121
+++ b/121
@ -10,7 +10,7 @@
 #     docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
 # Multi-arch build:
 #     docker buildx create --use
-#     docker buildx build . --platform=linux/amd64,linux/arm64--push -t archivebox/archivebox:latest -t archivebox/archivebox:dev
+#     docker buildx build . --platform=linux/amd64,linux/arm64,linux/arm/v7 --push -t archivebox/archivebox:latest -t archivebox/archivebox:dev
 #
 # Read more about [developing Archivebox](https://github.com/ArchiveBox/ArchiveBox#archivebox-development).

@ -20,23 +20,9 @@ FROM python:3.11-slim-bookworm

 LABEL name="archivebox" \
    maintainer="Nick Sweeting <dockerfile@archivebox.io>" \
-    description="All-in-one self-hosted internet archiving solution" \
+    description="All-in-one personal internet archiving container" \
    homepage="https://github.com/ArchiveBox/ArchiveBox" \
-    documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker" \
-    org.opencontainers.image.title="ArchiveBox" \
-    org.opencontainers.image.vendor="ArchiveBox" \
-    org.opencontainers.image.description="All-in-one self-hosted internet archiving solution" \
-    org.opencontainers.image.source="https://github.com/ArchiveBox/ArchiveBox" \
-    com.docker.image.source.entrypoint="Dockerfile" \
-    # TODO: release ArchiveBox as a Docker Desktop extension (requires these labels):
-    # https://docs.docker.com/desktop/extensions-sdk/architecture/metadata/
-    com.docker.desktop.extension.api.version=">= 1.4.7" \
-    com.docker.desktop.extension.icon="https://archivebox.io/icon.png" \
-    com.docker.extension.publisher-url="https://archivebox.io" \
-    com.docker.extension.screenshots='[{"alt": "Screenshot of Admin UI", "url": "https://github.com/ArchiveBox/ArchiveBox/assets/511499/e8e0b6f8-8fdf-4b7f-8124-c10d8699bdb2"}]' \
-    com.docker.extension.detailed-description='See here for detailed documentation: https://wiki.archivebox.io' \
-    com.docker.extension.changelog='See here for release notes: https://github.com/ArchiveBox/ArchiveBox/releases' \
-    com.docker.extension.categories='database,utility-tools'
+    documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker"

 ARG TARGETPLATFORM
 ARG TARGETOS
@ -87,9 +73,7 @@ COPY --chown=root:root --chmod=755 package.json "$CODE_DIR/"
 RUN grep '"version": ' "${CODE_DIR}/package.json" | awk -F'"' '{print $4}' > /VERSION.txt

 # Force apt to leave downloaded binaries in /var/cache/apt (massively speeds up Docker builds)
-RUN echo 'Binary::apt::APT::Keep-Downloaded-Packages "1";' > /etc/apt/apt.conf.d/99keep-cache \
-    && echo 'APT::Install-Recommends "0";' > /etc/apt/apt.conf.d/99no-intall-recommends \
-    && echo 'APT::Install-Suggests "0";' > /etc/apt/apt.conf.d/99no-intall-suggests \
+RUN echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache \
    && rm -f /etc/apt/apt.conf.d/docker-clean

 # Print debug info about build and save it to disk, for human eyes only, not used by anything else
@ -122,10 +106,10 @@ RUN echo "[*] Setting up $ARCHIVEBOX_USER user uid=${DEFAULT_PUID}..." \
 # Install system apt dependencies (adding backports to access more recent apt updates)
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT \
    echo "[+] Installing APT base system dependencies for $TARGETPLATFORM..." \
-    && echo 'deb https://deb.debian.org/debian bookworm-backports main contrib non-free' > /etc/apt/sources.list.d/backports.list \
+    && echo 'deb https://deb.debian.org/debian bookworm-backports main contrib non-free' >> /etc/apt/sources.list.d/backports.list \
    && mkdir -p /etc/apt/keyrings \
    && apt-get update -qq \
-    && apt-get install -qq -y -t bookworm-backports \
+    && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
        # 1. packaging dependencies
        apt-transport-https ca-certificates apt-utils gnupg2 curl wget \
        # 2. docker and init system dependencies
@ -136,13 +120,27 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T

 ######### Language Environments ####################################

+# Install Node environment
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \
+    echo "[+] Installing Node $NODE_VERSION environment in $NODE_MODULES..." \
+    && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" >> /etc/apt/sources.list.d/nodejs.list \
+    && curl -fsSL "https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key" | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
+    && apt-get update -qq \
+    && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
+        nodejs libatomic1 python3-minimal \
+    && rm -rf /var/lib/apt/lists/* \
+    # Update NPM to latest version
+    && npm i -g npm --cache /root/.npm \
+    # Save version info
+    && ( \
+        which node && node --version \
+        && which npm && npm --version \
+        && echo -e '\n\n' \
+    ) | tee -a /VERSION.txt
+
 # Install Python environment
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
    echo "[+] Setting up Python $PYTHON_VERSION runtime..." \
-    # && apt-get update -qq \
-    # && apt-get install -qq -y -t bookworm-backports --no-upgrade \
-    #     python${PYTHON_VERSION} python${PYTHON_VERSION}-minimal python3-pip \
-    # && rm -rf /var/lib/apt/lists/* \
    # tell PDM to allow using global system python site packages
    # && rm /usr/lib/python3*/EXTERNALLY-MANAGED \
    # create global virtual environment GLOBAL_VENV to use (better than using pip install --global)
@ -159,37 +157,17 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
        && echo -e '\n\n' \
    ) | tee -a /VERSION.txt

-
-# Install Node environment
-RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \
-    echo "[+] Installing Node $NODE_VERSION environment in $NODE_MODULES..." \
-    && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" >> /etc/apt/sources.list.d/nodejs.list \
-    && curl -fsSL "https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key" | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
-    && apt-get update -qq \
-    && apt-get install -qq -y -t bookworm-backports --no-upgrade libatomic1 \
-    && apt-get install -y -t bookworm-backports --no-upgrade \
-        nodejs \
-    && rm -rf /var/lib/apt/lists/* \
-    # Update NPM to latest version
-    && npm i -g npm --cache /root/.npm \
-    # Save version info
-    && ( \
-        which node && node --version \
-        && which npm && npm --version \
-        && echo -e '\n\n' \
-    ) | tee -a /VERSION.txt
-
-
 ######### Extractor Dependencies ##################################

 # Install apt dependencies
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
    echo "[+] Installing APT extractor dependencies globally using apt..." \
    && apt-get update -qq \
-    && apt-get install -qq -y -t bookworm-backports \
+    && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
        curl wget git yt-dlp ffmpeg ripgrep \
        # Packages we have also needed in the past:
        # youtube-dl wget2 aria2 python3-pyxattr rtmpdump libfribidi-bin mpv \
+        # fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
    && rm -rf /var/lib/apt/lists/* \
    # Save version info
    && ( \
@ -205,21 +183,18 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/ms-playwright,sharing=locked,id=browsers-$TARGETARCH$TARGETVARIANT \
    echo "[+] Installing Browser binary dependencies to $PLAYWRIGHT_BROWSERS_PATH..." \
    && apt-get update -qq \
-    && apt-get install -qq -y -t bookworm-backports \
-        fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-khmeros fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
-        at-spi2-common fonts-liberation fonts-noto-color-emoji fonts-tlwg-loma-otf fonts-unifont libatk-bridge2.0-0 libatk1.0-0 libatspi2.0-0 libavahi-client3 \
-        libavahi-common-data libavahi-common3 libcups2 libfontenc1 libice6 libnspr4 libnss3 libsm6 libunwind8 \
-        libxaw7 libxcomposite1 libxdamage1 libxfont2 \
-        libxkbfile1 libxmu6 libxpm4 libxt6 x11-xkb-utils xfonts-encodings \
-        # xfonts-scalable xfonts-utils xserver-common xvfb \
-        # chrome can run without dbus/upower technically, it complains about missing dbus but should run ok anyway
-        # libxss1 dbus dbus-x11 upower \
-    # && service dbus start \
-    # install Chromium using playwright
-    && pip install playwright \
-    && cp -r /root/.cache/ms-playwright "$PLAYWRIGHT_BROWSERS_PATH" \
-    && playwright install chromium \
-    && export CHROME_BINARY="$(python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')" \
+    && if [[ "$TARGETPLATFORM" == *amd64* || "$TARGETPLATFORM" == *arm64* ]]; then \
+        # install Chromium using playwright
+        pip install playwright \
+        && cp -r /root/.cache/ms-playwright "$PLAYWRIGHT_BROWSERS_PATH" \
+        && playwright install --with-deps chromium \
+        && export CHROME_BINARY="$(python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')"; \
+    else \
+        # fall back to installing Chromium via apt-get on platforms not supported by playwright (e.g. risc, ARMv7, etc.) 
+        apt-get install -qq -y -t bookworm-backports --no-install-recommends \
+            chromium fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
+        && export CHROME_BINARY="$(which chromium)"; \
+    fi \
    && rm -rf /var/lib/apt/lists/* \
    && ln -s "$CHROME_BINARY" /usr/bin/chromium-browser \
    && mkdir -p "/home/${ARCHIVEBOX_USER}/.config/chromium/Crash Reports/pending/" \
@ -252,7 +227,7 @@ COPY --chown=root:root --chmod=755 "./pyproject.toml" "requirements.txt" "$CODE_
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
    echo "[+] Installing PIP ArchiveBox dependencies from requirements.txt for ${TARGETPLATFORM}..." \
    && apt-get update -qq \
-    && apt-get install -qq -y -t bookworm-backports \
+    && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
        build-essential \
        libssl-dev libldap2-dev libsasl2-dev \
        python3-ldap python3-msgpack python3-mutagen python3-regex python3-pycryptodome procps \
@ -274,8 +249,8 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
    echo "[*] Installing PIP ArchiveBox package from $CODE_DIR..." \
    # && apt-get update -qq \
    # install C compiler to build deps on platforms that dont have 32-bit wheels available on pypi
-    # && apt-get install -qq -y -t bookworm-backports \
-        # build-essential  \
+    # && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
+    #     build-essential  \
    # INSTALL ARCHIVEBOX python package globally from CODE_DIR, with all optional dependencies
    && pip install -e "$CODE_DIR"[sonic,ldap] \
    # save docker image size and always remove compilers / build tools after building is complete
@ -287,14 +262,9 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T

 # Setup ArchiveBox runtime config
 WORKDIR "$DATA_DIR"
-ENV IN_DOCKER=True \
-    DISPLAY=novnc:0.0 \
-    CUSTOM_TEMPLATES_DIR=/data/templates \
-    GOOGLE_API_KEY=no \
-    GOOGLE_DEFAULT_CLIENT_ID=no \
-    GOOGLE_DEFAULT_CLIENT_SECRET=no \
-    ALLOWED_HOSTS=*
+ENV IN_DOCKER=True
    ## No need to set explicitly, these values will be autodetected by archivebox in docker:
+    # CHROME_SANDBOX=False \
    # WGET_BINARY="wget" \
    # YOUTUBEDL_BINARY="yt-dlp" \
    # CHROME_BINARY="/usr/bin/chromium-browser" \
@ -319,8 +289,9 @@ WORKDIR "$DATA_DIR"
 VOLUME "$DATA_DIR"
 EXPOSE 8000

-HEALTHCHECK --interval=30s --timeout=20s --retries=15 \
-    CMD curl --silent 'http://localhost:8000/health/' | grep -q 'OK'
+# Optional:
+# HEALTHCHECK --interval=30s --timeout=20s --retries=15 \
+#     CMD curl --silent 'http://localhost:8000/admin/login/' || exit 1

 ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
 CMD ["archivebox", "server", "--quick-init", "0.0.0.0:8000"]
--- a/README.md
+++ b/README.md
--- a/archivebox/init.py
+++ b/archivebox/init.py
@ -1,4 +1 @@
 __package__ = 'archivebox'
-
-
-from .monkey_patches import *
--- a/archivebox/abid_utils/init.py
+++ b/archivebox/abid_utils/init.py
@ -1 +0,0 @@
-__package__ = 'abid_utils'
--- a/archivebox/abid_utils/abid.py
+++ b/archivebox/abid_utils/abid.py
@ -1,191 +0,0 @@
-from typing import NamedTuple, Any, Union, Optional
-
-import ulid
-import uuid6
-import hashlib
-from urllib.parse import urlparse
-
-from uuid import UUID
-from typeid import TypeID            # type: ignore[import-untyped]
-from datetime import datetime
-
-
-
-ABID_PREFIX_LEN = 4
-ABID_SUFFIX_LEN = 26
-ABID_LEN = 30
-ABID_TS_LEN = 10
-ABID_URI_LEN = 8
-ABID_SUBTYPE_LEN = 2
-ABID_RAND_LEN = 6
-
-DEFAULT_ABID_PREFIX = 'obj_'
-
-
-class ABID(NamedTuple):
-    """
-    e.g. ABID('obj_01HX9FPYTRE4A5CCD901ZYEBQE')
-    """
-    prefix: str            # e.g. obj_
-    ts: str                # e.g. 01HX9FPYTR
-    uri: str               # e.g. E4A5CCD9
-    subtype: str           # e.g. 01
-    rand: str              # e.g. ZYEBQE
-
-    def __getattr__(self, attr: str) -> Any:
-        return getattr(self.ulid, attr)
-
-    def __eq__(self, other: Any) -> bool:
-        try:
-            return self.ulid == other.ulid
-        except AttributeError:
-            return NotImplemented
-
-    def __str__(self) -> str:
-        return self.prefix + self.suffix
-
-    def __len__(self) -> int:
-        return len(self.prefix + self.suffix)
-
-    @classmethod
-    def parse(cls, buffer: Union[str, UUID, ulid.ULID, TypeID, 'ABID'], prefix=DEFAULT_ABID_PREFIX) -> 'ABID':
-        assert buffer, f'Attempted to create ABID from null value {buffer}'
-
-        buffer = str(buffer)
-        if '_' in buffer:
-            prefix, suffix = buffer.split('_')
-        else:
-            prefix, suffix = prefix.strip('_'), buffer
-
-        assert len(prefix) == ABID_PREFIX_LEN - 1   # length without trailing _
-        assert len(suffix) == ABID_SUFFIX_LEN, f'Suffix {suffix} from {buffer} was not {ABID_SUFFIX_LEN} chars long'
-
-        return cls(
-            prefix=abid_part_from_prefix(prefix),
-            ts=suffix[0:10].upper(),
-            uri=suffix[10:18].upper(),
-            subtype=suffix[18:20].upper(),
-            rand=suffix[20:26].upper(),
-        )
-
-    @property
-    def suffix(self):
-        return ''.join((self.ts, self.uri, self.subtype, self.rand))
-    
-    @property
-    def ulid(self) -> ulid.ULID:
-        return ulid.parse(self.suffix)
-
-    @property
-    def uuid(self) -> UUID:
-        return self.ulid.uuid
-
-    @property
-    def uuid6(self) -> uuid6.UUID:
-        return uuid6.UUID(hex=self.uuid.hex)
-
-    @property
-    def typeid(self) -> TypeID:
-        return TypeID.from_uuid(prefix=self.prefix.strip('_'), suffix=self.uuid6)
-
-    @property
-    def datetime(self) -> datetime:
-        return self.ulid.timestamp().datetime
-
-
-
-####################################################
-
-
-def uri_hash(uri: Union[str, bytes]) -> str:
-    """
-    'E4A5CCD9AF4ED2A6E0954DF19FD274E9CDDB4853051F033FD518BFC90AA1AC25'
-    """
-    if isinstance(uri, bytes):
-        uri_str: str = uri.decode()
-    else:
-        uri_str = uri
-
-    # only hash the domain part of URLs
-    if '://' in uri_str:
-        try:
-            domain = urlparse(uri_str).netloc
-            if domain:
-                uri_str = domain
-        except AttributeError:
-            pass
-    
-    uri_bytes = uri_str.encode('utf-8')
-
-    return hashlib.sha256(uri_bytes).hexdigest().upper()
-
-def abid_part_from_prefix(prefix: Optional[str]) -> str:
-    """
-    'snp_'
-    """
-    if prefix is None:
-        return 'obj_'
-
-    prefix = prefix.strip('_').lower()
-    assert len(prefix) == 3
-    return prefix + '_'
-
-def abid_part_from_uri(uri: str) -> str:
-    """
-    'E4A5CCD9'     # takes first 8 characters of sha256(url)
-    """
-    uri = str(uri)
-    return uri_hash(uri)[:ABID_URI_LEN]
-
-def abid_part_from_ts(ts: Optional[datetime]) -> str:
-    """
-    '01HX9FPYTR'   # produces 10 character Timestamp section of ulid based on added date
-    """
-    return str(ulid.from_timestamp(ts) if ts else ulid.new())[:ABID_TS_LEN]
-
-def abid_part_from_subtype(subtype: str) -> str:
-    """
-    Snapshots have 01 type, other objects have other subtypes like wget/media/etc.
-    Also allows us to change the ulid spec later by putting special sigil values here.
-    """
-    subtype = str(subtype)
-    if len(subtype) == ABID_SUBTYPE_LEN:
-        return subtype
-
-    return hashlib.sha256(subtype.encode('utf-8')).hexdigest()[:ABID_SUBTYPE_LEN].upper()
-
-def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
-    """
-    'ZYEBQE'   # takes last 6 characters of randomness from existing legacy uuid db field
-    """
-    if rand is None:
-        # if it's None we generate a new random 6 character hex string
-        return str(ulid.new())[-ABID_RAND_LEN:]
-    elif isinstance(rand, UUID):
-        # if it's a uuid we take the last 6 characters of the ULID represation of it
-        return str(ulid.from_uuid(rand))[-ABID_RAND_LEN:]
-    elif isinstance(rand, int):
-        # if it's a BigAutoInteger field we convert it from an int to a 0-padded string
-        rand_str = str(rand)[-ABID_RAND_LEN:]
-        padding_needed = ABID_RAND_LEN - len(rand_str)
-        rand_str = ('0'*padding_needed) + rand_str
-        return rand_str
-
-    # otherwise treat it as a string, take the last 6 characters of it verbatim
-    return str(rand)[-ABID_RAND_LEN:].upper()
-
-
-def abid_from_values(prefix, ts, uri, subtype, rand) -> ABID:
-    """
-    Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
-    """
-
-    abid = ABID(
-        prefix=abid_part_from_prefix(prefix),
-        ts=abid_part_from_ts(ts),
-        uri=abid_part_from_uri(uri),
-        subtype=abid_part_from_subtype(subtype),
-        rand=abid_part_from_rand(rand),
-    )
-    assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for ts={ts} uri={uri} subtyp={subtype} rand={rand}'
-    return abid
--- a/archivebox/abid_utils/apps.py
+++ b/archivebox/abid_utils/apps.py
@ -1,7 +0,0 @@
-from django.apps import AppConfig
-
-
-class AbidUtilsConfig(AppConfig):
-    default_auto_field = 'django.db.models.BigAutoField'
-    
-    name = 'abid_utils'
--- a/archivebox/abid_utils/migrations/init.py
+++ b/archivebox/abid_utils/migrations/init.py
--- a/archivebox/abid_utils/models.py
+++ b/archivebox/abid_utils/models.py
@ -1,314 +0,0 @@
-"""
-This file provides the Django ABIDField and ABIDModel base model to inherit from.
-
-It implements the ArchiveBox ID (ABID) interfaces including abid_values, get_abid, .abid, .uuid, .id.
-"""
-
-from typing import Any, Dict, Union, List, Set, NamedTuple, cast
-
-from ulid import ULID
-from uuid import uuid4, UUID
-from typeid import TypeID            # type: ignore[import-untyped]
-from datetime import datetime
-from functools import partial
-from charidfield import CharIDField  # type: ignore[import-untyped]
-
-from django.conf import settings
-from django.db import models
-from django.db.utils import OperationalError
-from django.contrib.auth import get_user_model
-
-from django_stubs_ext.db.models import TypedModelMeta
-
-from .abid import (
-    ABID,
-    ABID_LEN,
-    ABID_RAND_LEN,
-    ABID_SUFFIX_LEN,
-    DEFAULT_ABID_PREFIX,
-    abid_part_from_prefix,
-    abid_from_values
-)
-
-####################################################
-
-
-# Database Field for typeid/ulid style IDs with a prefix, e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ
-ABIDField = partial(
-    CharIDField,
-    max_length=ABID_LEN,
-    help_text="ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)",
-    default=None,
-    null=True,
-    blank=True,
-    db_index=True,
-    unique=True,
-)
-
-def get_or_create_system_user_pk(username='system'):
-    """Get or create a system user with is_superuser=True to be the default owner for new DB rows"""
-
-    User = get_user_model()
-
-    # if only one user exists total, return that user
-    if User.objects.filter(is_superuser=True).count() == 1:
-        return User.objects.filter(is_superuser=True).values_list('pk', flat=True)[0]
-
-    # otherwise, create a dedicated "system" user
-    user, created = User.objects.get_or_create(username=username, is_staff=True, is_superuser=True, defaults={'email': '', 'password': ''})
-    return user.pk
-
-
-class ABIDModel(models.Model):
-    """
-    Abstract Base Model for other models to depend on. Provides ArchiveBox ID (ABID) interface.
-    """
-    abid_prefix: str = DEFAULT_ABID_PREFIX  # e.g. 'tag_'
-    abid_ts_src = 'None'                    # e.g. 'self.created'
-    abid_uri_src = 'None'                   # e.g. 'self.uri'
-    abid_subtype_src = 'None'               # e.g. 'self.extractor'
-    abid_rand_src = 'None'                  # e.g. 'self.uuid' or 'self.id'
-
-    id = models.UUIDField(primary_key=True, default=uuid4, editable=True)
-    uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
-    abid = ABIDField(prefix=abid_prefix)
-
-    created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk)
-    created = models.DateTimeField(auto_now_add=True)
-    modified = models.DateTimeField(auto_now=True)
-
-    class Meta(TypedModelMeta):
-        abstract = True
-
-    def save(self, *args: Any, **kwargs: Any) -> None:
-        if hasattr(self, 'abid'):
-            # self.abid = ABID.parse(self.abid) if self.abid else self.get_abid()
-            self.abid = self.get_abid()
-        else:
-            print(f'[!] WARNING: {self.__class__.__name__}.abid is not a DB field so ABID will not be persisted!')
-            self.abid = self.get_abid()
-        
-        super().save(*args, **kwargs)
-
-    @property
-    def abid_values(self) -> Dict[str, Any]:
-        return {
-            'prefix': self.abid_prefix,
-            'ts': eval(self.abid_ts_src),
-            'uri': eval(self.abid_uri_src),
-            'subtype': eval(self.abid_subtype_src),
-            'rand': eval(self.abid_rand_src),
-        }
-
-    def get_abid(self) -> ABID:
-        """
-        Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
-        """
-        prefix, ts, uri, subtype, rand = self.abid_values.values()
-
-        if (not prefix) or prefix == DEFAULT_ABID_PREFIX:
-            suggested_abid = self.__class__.__name__[:3].lower()
-            raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})')
-
-        if not ts:
-            ts = datetime.utcfromtimestamp(0)
-            print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat())
-
-        if not uri:
-            uri = str(self)
-            print(f'[!] WARNING: Generating ABID with uri=str(self) placeholder because {self.__class__.__name__}.abid_uri_src={self.abid_uri_src} is unset!', uri)
-
-        if not subtype:
-            subtype = self.__class__.__name__
-            print(f'[!] WARNING: Generating ABID with subtype={subtype} placeholder because {self.__class__.__name__}.abid_subtype_src={self.abid_subtype_src} is unset!', subtype)
-
-        if not rand:
-            rand = getattr(self, 'uuid', None) or getattr(self, 'id', None) or getattr(self, 'pk')
-            print(f'[!] WARNING: Generating ABID with rand=self.id placeholder because {self.__class__.__name__}.abid_rand_src={self.abid_rand_src} is unset!', rand)
-
-        abid = abid_from_values(
-            prefix=prefix,
-            ts=ts,
-            uri=uri,
-            subtype=subtype,
-            rand=rand,
-        )
-        assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}'
-        return abid
-
-    @property
-    def ABID(self) -> ABID:
-        """
-        ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE')
-        """
-        return ABID.parse(self.abid) if getattr(self, 'abid', None) else self.get_abid()
-
-    @property
-    def ULID(self) -> ULID:
-        """
-        Get a ulid.ULID representation of the object's ABID.
-        """
-        return self.ABID.ulid
-
-    @property
-    def UUID(self) -> UUID:
-        """
-        Get a uuid.UUID (v4) representation of the object's ABID.
-        """
-        return self.ABID.uuid
-
-    @property
-    def TypeID(self) -> TypeID:
-        """
-        Get a typeid.TypeID (stripe-style) representation of the object's ABID.
-        """
-        return self.ABID.typeid
-
-
-
-####################################################
-
-# Django helpers
-def find_all_abid_prefixes() -> Dict[str, type[models.Model]]:
-    """
-    Return the mapping of all ABID prefixes to their models.
-    e.g. {'tag_': core.models.Tag, 'snp_': core.models.Snapshot, ...}
-    """
-    import django.apps
-    prefix_map = {}
-
-    for model in django.apps.apps.get_models():
-        abid_prefix = getattr(model, 'abid_prefix', None)
-        if abid_prefix:
-            prefix_map[abid_prefix] = model
-    return prefix_map
-
-def find_prefix_for_abid(abid: ABID) -> str:
-    """
-    Find the correct prefix for a given ABID that may have be missing a prefix (slow).
-    e.g. ABID('obj_01BJQMF54D093DXEAWZ6JYRPAQ') -> 'snp_'
-    """
-    # if existing abid prefix is correct, lookup is easy
-    model = find_model_from_abid(abid)
-    if model:
-        assert issubclass(model, ABIDModel)
-        return model.abid_prefix
-
-    # prefix might be obj_ or missing, fuzzy-search to find any object that matches
-    return find_obj_from_abid_rand(abid)[0].abid_prefix
-
-def find_model_from_abid_prefix(prefix: str) -> type[ABIDModel] | None:
-    """
-    Return the Django Model that corresponds to a given ABID prefix.
-    e.g. 'tag_' -> core.models.Tag
-    """
-    prefix = abid_part_from_prefix(prefix)
-
-    import django.apps
-
-    for model in django.apps.apps.get_models():
-        if not issubclass(model, ABIDModel): continue   # skip non-ABID-enabled models
-        if not hasattr(model, 'objects'): continue      # skip abstract models
-
-        if (model.abid_prefix == prefix):
-            return model
-
-    return None
-
-def find_model_from_abid(abid: ABID) -> type[models.Model] | None:
-    """
-    Shortcut for find_model_from_abid_prefix(abid.prefix)
-    """
-    return find_model_from_abid_prefix(abid.prefix)
-
-def find_obj_from_abid_rand(rand: Union[ABID, str], model=None) -> List[ABIDModel]:
-    """
-    Find an object corresponding to an ABID by exhaustively searching using its random suffix (slow).
-    e.g. 'obj_....................JYRPAQ' -> Snapshot('snp_01BJQMF54D093DXEAWZ6JYRPAQ')
-    """
-
-    # convert str to ABID if necessary
-    if isinstance(rand, ABID):
-        abid: ABID = rand
-    else:
-        rand = str(rand)
-        if len(rand) < ABID_SUFFIX_LEN:
-            padding_needed = ABID_SUFFIX_LEN - len(rand)
-            rand = ('0'*padding_needed) + rand
-        abid = ABID.parse(rand)
-
-    import django.apps
-
-    partial_matches: List[ABIDModel] = []
-
-    models_to_try = cast(Set[type[models.Model]], set(filter(bool, (
-        model,
-        find_model_from_abid(abid),
-        *django.apps.apps.get_models(),
-    ))))
-    # print(abid, abid.rand, abid.uuid, models_to_try)
-
-    for model in models_to_try:
-        if not issubclass(model, ABIDModel): continue   # skip Models that arent ABID-enabled
-        if not hasattr(model, 'objects'): continue      # skip abstract Models
-        assert hasattr(model, 'objects')                # force-fix for type hint nit about missing manager https://github.com/typeddjango/django-stubs/issues/1684
-
-        # continue on to try fuzzy searching by randomness portion derived from uuid field
-        try:
-            qs = []
-            if hasattr(model, 'abid'):
-                qs = model.objects.filter(abid__endswith=abid.rand)
-            elif hasattr(model, 'uuid'):
-                qs = model.objects.filter(uuid__endswith=str(abid.uuid)[-ABID_RAND_LEN:])
-            elif hasattr(model, 'id'):
-                # NOTE: this only works on SQLite where every column is a string
-                # other DB backends like postgres dont let you do __endswith if this is a BigAutoInteger field
-                
-                # try to search for uuid=...-2354352
-                # try to search for id=...2354352
-                # try to search for id=2354352
-                qs = model.objects.filter(
-                    models.Q(id__endswith=str(abid.uuid)[-ABID_RAND_LEN:])
-                    | models.Q(id__endswith=abid.rand)
-                    | models.Q(id__startswith=str(int(abid.rand)) if abid.rand.isdigit() else abid.rand)
-                )
-
-            for obj in qs:
-                if obj.get_abid() == abid:
-                    # found exact match, no need to keep iterating
-                    return [obj]
-                partial_matches.append(obj)
-        except OperationalError as err:
-            print(f'[!] WARNING: Got error while trying to iterate through QuerySet for {model}:', err, '\n')
-
-    return partial_matches
-
-def find_obj_from_abid(abid: ABID, model=None, fuzzy=False) -> Any:
-    """
-    Find an object with a given ABID by filtering possible models for a matching abid/uuid/id (fast).
-    e.g. 'snp_01BJQMF54D093DXEAWZ6JYRPAQ' -> Snapshot('snp_01BJQMF54D093DXEAWZ6JYRPAQ')
-    """
-
-    model = model or find_model_from_abid(abid)
-    assert model, f'Could not find model that could match this ABID type: {abid}'
-
-    try:
-        if hasattr(model, 'abid'):
-            return model.objects.get(abid__endswith=abid.suffix)
-        if hasattr(model, 'uuid'):
-            return model.objects.get(uuid=abid.uuid)
-        return model.objects.get(id=abid.uuid)
-    except model.DoesNotExist:
-        # if the model has an abid field then it shouldve matched, pointless to fuzzy search in that case
-        if hasattr(model, 'abid') or (not fuzzy):
-            raise
-
-    # continue on to try fuzzy searching by randomness portion derived from uuid field
-    match_by_rand = find_obj_from_abid_rand(abid, model=model)
-    if match_by_rand:
-        if match_by_rand[0].abid_prefix != abid.prefix:
-            print(f'[!] WARNING: fetched object {match_by_rand} even though prefix {abid.prefix} doesnt match!', abid, '\n')
-        return match_by_rand
-
-    raise model.DoesNotExist
-
--- a/archivebox/abid_utils/tests.py
+++ b/archivebox/abid_utils/tests.py
@ -1,3 +0,0 @@
-from django.test import TestCase
-
-# Create your tests here.
--- a/archivebox/api/init.py
+++ b/archivebox/api/init.py
@ -1 +0,0 @@
-__package__ = 'archivebox.api'
--- a/archivebox/api/apps.py
+++ b/archivebox/api/apps.py
@ -1,11 +0,0 @@
-__package__ = 'archivebox.api'
-
-from django.apps import AppConfig
-
-
-
-class APIConfig(AppConfig):
-    name = 'api'
-
-    def ready(self):
-        pass
--- a/archivebox/api/auth.py
+++ b/archivebox/api/auth.py
@ -1,107 +0,0 @@
-__package__ = 'archivebox.api'
-
-from typing import Optional
-
-from django.http import HttpRequest
-from django.contrib.auth import login
-from django.contrib.auth import authenticate
-from django.contrib.auth.models import AbstractBaseUser
-
-from ninja.security import HttpBearer, APIKeyQuery, APIKeyHeader, HttpBasicAuth, django_auth_superuser
-
-
-def auth_using_token(token, request: Optional[HttpRequest]=None) -> Optional[AbstractBaseUser]:
-    """Given an API token string, check if a corresponding non-expired APIToken exists, and return its user"""
-    from api.models import APIToken        # lazy import model to avoid loading it at urls.py import time
-    
-    user = None
-
-    submitted_empty_form = token in ('string', '', None)
-    if submitted_empty_form:
-        user = request.user       # see if user is authed via django session and use that as the default
-    else:
-        try:
-            token = APIToken.objects.get(token=token)
-            if token.is_valid():
-                user = token.user
-        except APIToken.DoesNotExist:
-            pass
-
-    if not user:
-        print('[❌] Failed to authenticate API user using API Key:', request)
-
-    return None
-
-def auth_using_password(username, password, request: Optional[HttpRequest]=None) -> Optional[AbstractBaseUser]:
-    """Given a username and password, check if they are valid and return the corresponding user"""
-    user = None
-    
-    submitted_empty_form = (username, password) in (('string', 'string'), ('', ''), (None, None))
-    if submitted_empty_form:
-        user = request.user       # see if user is authed via django session and use that as the default
-    else:
-        user = authenticate(
-            username=username,
-            password=password,
-        )
-
-    if not user:
-        print('[❌] Failed to authenticate API user using API Key:', request)
-
-    return user
-
-
-### Base Auth Types
-
-class APITokenAuthCheck:
-    """The base class for authentication methods that use an api.models.APIToken"""
-    def authenticate(self, request: HttpRequest, key: Optional[str]=None) -> Optional[AbstractBaseUser]:
-        user = auth_using_token(
-            token=key,
-            request=request,
-        )
-        if user is not None:
-            login(request, user, backend='django.contrib.auth.backends.ModelBackend')
-        return user
-
-class UserPassAuthCheck:
-    """The base class for authentication methods that use a username & password"""
-    def authenticate(self, request: HttpRequest, username: Optional[str]=None, password: Optional[str]=None) -> Optional[AbstractBaseUser]:
-        user = auth_using_password(
-            username=username,
-            password=password,
-            request=request,
-        )
-        if user is not None:
-            login(request, user, backend='django.contrib.auth.backends.ModelBackend')
-        return user
-
-
-### Django-Ninja-Provided Auth Methods
-
-class HeaderTokenAuth(APITokenAuthCheck, APIKeyHeader):
-    """Allow authenticating by passing X-API-Key=xyz as a request header"""
-    param_name = "X-ArchiveBox-API-Key"
-
-class BearerTokenAuth(APITokenAuthCheck, HttpBearer):
-    """Allow authenticating by passing Bearer=xyz as a request header"""
-    pass
-
-class QueryParamTokenAuth(APITokenAuthCheck, APIKeyQuery):
-    """Allow authenticating by passing api_key=xyz as a GET/POST query parameter"""
-    param_name = "api_key"
-
-class UsernameAndPasswordAuth(UserPassAuthCheck, HttpBasicAuth):
-    """Allow authenticating by passing username & password via HTTP Basic Authentication (not recommended)"""
-    pass
-
-
-### Enabled Auth Methods
-
-API_AUTH_METHODS = [
-    HeaderTokenAuth(),
-    BearerTokenAuth(),
-    QueryParamTokenAuth(), 
-    django_auth_superuser,
-    UsernameAndPasswordAuth(),
-]
--- a/archivebox/api/migrations/0001_initial.py
+++ b/archivebox/api/migrations/0001_initial.py
@ -1,29 +0,0 @@
-# Generated by Django 4.2.11 on 2024-04-25 04:19
-
-import api.models
-from django.conf import settings
-from django.db import migrations, models
-import django.db.models.deletion
-import uuid
-
-
-class Migration(migrations.Migration):
-
-    initial = True
-
-    dependencies = [
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name='APIToken',
-            fields=[
-                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
-                ('token', models.CharField(default=api.models.generate_secret_token, max_length=32, unique=True)),
-                ('created', models.DateTimeField(auto_now_add=True)),
-                ('expires', models.DateTimeField(blank=True, null=True)),
-                ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
-            ],
-        ),
-    ]
--- a/archivebox/api/migrations/0002_alter_apitoken_options.py
+++ b/archivebox/api/migrations/0002_alter_apitoken_options.py
@ -1,17 +0,0 @@
-# Generated by Django 5.0.4 on 2024-04-26 05:28
-
-from django.db import migrations
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('api', '0001_initial'),
-    ]
-
-    operations = [
-        migrations.AlterModelOptions(
-            name='apitoken',
-            options={'verbose_name': 'API Key', 'verbose_name_plural': 'API Keys'},
-        ),
-    ]
--- a/archivebox/api/migrations/0003_rename_user_apitoken_created_by_apitoken_abid_and_more.py
+++ b/archivebox/api/migrations/0003_rename_user_apitoken_created_by_apitoken_abid_and_more.py
@ -1,77 +0,0 @@
-# Generated by Django 5.0.6 on 2024-06-03 01:52
-
-import abid_utils.models
-import charidfield.fields
-import django.db.models.deletion
-import signal_webhooks.fields
-import signal_webhooks.utils
-import uuid
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('api', '0002_alter_apitoken_options'),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.RenameField(
-            model_name='apitoken',
-            old_name='user',
-            new_name='created_by',
-        ),
-        migrations.AddField(
-            model_name='apitoken',
-            name='abid',
-            field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='apt_', unique=True),
-        ),
-        migrations.AddField(
-            model_name='apitoken',
-            name='modified',
-            field=models.DateTimeField(auto_now=True),
-        ),
-        migrations.AddField(
-            model_name='apitoken',
-            name='uuid',
-            field=models.UUIDField(blank=True, null=True, unique=True),
-        ),
-        migrations.AlterField(
-            model_name='apitoken',
-            name='id',
-            field=models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False),
-        ),
-        migrations.CreateModel(
-            name='OutboundWebhook',
-            fields=[
-                ('name', models.CharField(db_index=True, help_text='Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).', max_length=255, unique=True, verbose_name='name')),
-                ('signal', models.CharField(choices=[('CREATE', 'Create'), ('UPDATE', 'Update'), ('DELETE', 'Delete'), ('M2M', 'M2M changed'), ('CREATE_OR_UPDATE', 'Create or Update'), ('CREATE_OR_DELETE', 'Create or Delete'), ('CREATE_OR_M2M', 'Create or M2M changed'), ('UPDATE_OR_DELETE', 'Update or Delete'), ('UPDATE_OR_M2M', 'Update or M2M changed'), ('DELETE_OR_M2M', 'Delete or M2M changed'), ('CREATE_UPDATE_OR_DELETE', 'Create, Update or Delete'), ('CREATE_UPDATE_OR_M2M', 'Create, Update or M2M changed'), ('CREATE_DELETE_OR_M2M', 'Create, Delete or M2M changed'), ('UPDATE_DELETE_OR_M2M', 'Update, Delete or M2M changed'), ('CREATE_UPDATE_DELETE_OR_M2M', 'Create, Update or Delete, or M2M changed')], help_text='The type of event the webhook should fire for (e.g. Create, Update, Delete).', max_length=255, verbose_name='signal')),
-                ('ref', models.CharField(db_index=True, help_text='Dot import notation of the model the webhook should fire for (e.g. core.models.Snapshot or core.models.ArchiveResult).', max_length=1023, validators=[signal_webhooks.utils.model_from_reference], verbose_name='referenced model')),
-                ('endpoint', models.URLField(help_text='External URL to POST the webhook notification to (e.g. https://someapp.example.com/webhook/some-webhook-receiver).', max_length=2047, verbose_name='endpoint')),
-                ('headers', models.JSONField(blank=True, default=dict, help_text='Headers to send with the webhook request.', validators=[signal_webhooks.utils.is_dict], verbose_name='headers')),
-                ('auth_token', signal_webhooks.fields.TokenField(blank=True, default='', help_text='Authentication token to use in an Authorization header.', max_length=8000, validators=[signal_webhooks.utils.decode_cipher_key], verbose_name='authentication token')),
-                ('enabled', models.BooleanField(default=True, help_text='Is this webhook enabled?', verbose_name='enabled')),
-                ('keep_last_response', models.BooleanField(default=False, help_text='Should the webhook keep a log of the latest response it got?', verbose_name='keep last response')),
-                ('updated', models.DateTimeField(auto_now=True, help_text='When the webhook was last updated.', verbose_name='updated')),
-                ('last_response', models.CharField(blank=True, default='', help_text='Latest response to this webhook.', max_length=8000, verbose_name='last response')),
-                ('last_success', models.DateTimeField(default=None, help_text='When the webhook last succeeded.', null=True, verbose_name='last success')),
-                ('last_failure', models.DateTimeField(default=None, help_text='When the webhook last failed.', null=True, verbose_name='last failure')),
-                ('created', models.DateTimeField(auto_now_add=True)),
-                ('modified', models.DateTimeField(auto_now=True)),
-                ('id', models.UUIDField(blank=True, null=True, unique=True)),
-                ('uuid', models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False)),
-                ('abid', charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='whk_', unique=True)),
-                ('created_by', models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
-            ],
-            options={
-                'verbose_name': 'API Outbound Webhook',
-                'abstract': False,
-            },
-        ),
-        migrations.AddConstraint(
-            model_name='outboundwebhook',
-            constraint=models.UniqueConstraint(fields=('ref', 'endpoint'), name='prevent_duplicate_hooks_api_outboundwebhook'),
-        ),
-    ]
--- a/archivebox/api/migrations/init.py
+++ b/archivebox/api/migrations/init.py
--- a/archivebox/api/models.py
+++ b/archivebox/api/models.py
@ -1,115 +0,0 @@
-__package__ = 'archivebox.api'
-
-import uuid
-import secrets
-from datetime import timedelta
-
-from django.conf import settings
-from django.db import models
-from django.utils import timezone
-
-from signal_webhooks.models import WebhookBase
-
-from django_stubs_ext.db.models import TypedModelMeta
-
-from abid_utils.models import ABIDModel, ABIDField
-
-
-def generate_secret_token() -> str:
-    # returns cryptographically secure string with len() == 32
-    return secrets.token_hex(16)
-
-
-class APIToken(ABIDModel):
-    """
-    A secret key generated by a User that's used to authenticate REST API requests to ArchiveBox.
-    """
-    # ABID: apt_<created_ts>_<token_hash>_<user_id_hash>_<uuid_rand>
-    abid_prefix = 'apt_'
-    abid_ts_src = 'self.created'
-    abid_uri_src = 'self.token'
-    abid_subtype_src = 'self.user_id'
-    abid_rand_src = 'self.id'
-
-    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
-    uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
-    abid = ABIDField(prefix=abid_prefix)
-
-    created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
-    token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
-    
-    created = models.DateTimeField(auto_now_add=True)
-    expires = models.DateTimeField(null=True, blank=True)
-    
-
-    class Meta(TypedModelMeta):
-        verbose_name = "API Key"
-        verbose_name_plural = "API Keys"
-
-    def __str__(self) -> str:
-        return self.token
-
-    def __repr__(self) -> str:
-        return f'<APIToken user={self.user.username} token=************{self.token[-4:]}>'
-
-    def __json__(self) -> dict:
-        return {
-            "TYPE":             "APIToken",    
-            "uuid":             str(self.id),
-            "abid":             str(self.get_abid()),
-            "user_id":          str(self.user.id),
-            "user_username":    self.user.username,
-            "token":            self.token,
-            "created":          self.created.isoformat(),
-            "expires":          self.expires_as_iso8601,
-        }
-
-    @property
-    def expires_as_iso8601(self):
-        """Returns the expiry date of the token in ISO 8601 format or a date 100 years in the future if none."""
-        expiry_date = self.expires or (timezone.now() + timedelta(days=365 * 100))
-
-        return expiry_date.isoformat()
-
-    def is_valid(self, for_date=None):
-        for_date = for_date or timezone.now()
-
-        if self.expires and self.expires < for_date:
-            return False
-
-        return True
-
-
-
-
-
-
-# monkey patch django-signals-webhooks to change how it shows up in Admin UI
-
-class OutboundWebhook(ABIDModel, WebhookBase):
-    """
-    Model used in place of (extending) signals_webhooks.models.WebhookModel. Swapped using:
-        settings.SIGNAL_WEBHOOKS_CUSTOM_MODEL = 'api.models.OutboundWebhook'
-    """
-    abid_prefix = 'whk_'
-    abid_ts_src = 'self.created'
-    abid_uri_src = 'self.endpoint'
-    abid_subtype_src = 'self.ref'
-    abid_rand_src = 'self.id'
-
-    id = models.UUIDField(blank=True, null=True, unique=True, editable=True)
-    uuid = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
-    abid = ABIDField(prefix=abid_prefix)
-
-    WebhookBase._meta.get_field('name').help_text = (
-        'Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).')
-    WebhookBase._meta.get_field('signal').help_text = (
-        'The type of event the webhook should fire for (e.g. Create, Update, Delete).')
-    WebhookBase._meta.get_field('ref').help_text = (
-        'Dot import notation of the model the webhook should fire for (e.g. core.models.Snapshot or core.models.ArchiveResult).')
-    WebhookBase._meta.get_field('endpoint').help_text = (
-        'External URL to POST the webhook notification to (e.g. https://someapp.example.com/webhook/some-webhook-receiver).')
-
-    class Meta(WebhookBase.Meta):
-        verbose_name = 'API Outbound Webhook'
-
--- a/archivebox/api/tests.py
+++ b/archivebox/api/tests.py
@ -1,30 +0,0 @@
-__package__ = 'archivebox.api'
-
-from django.test import TestCase
-from ninja.testing import TestClient
-
-from .routes_cli import router
-
-class ArchiveBoxCLIAPITestCase(TestCase):
-    def setUp(self):
-        self.client = TestClient(router)
-
-    def test_add_endpoint(self):
-        response = self.client.post("/add", json={"urls": ["http://example.com"], "tag": "testTag1,testTag2"})
-        self.assertEqual(response.status_code, 200)
-        self.assertTrue(response.json()["success"])
-
-    def test_remove_endpoint(self):
-        response = self.client.post("/remove", json={"filter_patterns": ["http://example.com"]})
-        self.assertEqual(response.status_code, 200)
-        self.assertTrue(response.json()["success"])
-
-    def test_update_endpoint(self):
-        response = self.client.post("/update", json={})
-        self.assertEqual(response.status_code, 200)
-        self.assertTrue(response.json()["success"])
-
-    def test_list_all_endpoint(self):
-        response = self.client.post("/list_all", json={})
-        self.assertEqual(response.status_code, 200)
-        self.assertTrue(response.json()["success"])
--- a/archivebox/api/urls.py
+++ b/archivebox/api/urls.py
@ -1,17 +0,0 @@
-__package__ = 'archivebox.api'
-
-from django.urls import path
-from django.views.generic.base import RedirectView
-
-from .v1_api import urls as v1_api_urls
-
-urlpatterns = [
-    path("",                 RedirectView.as_view(url='/api/v1')),
-
-    path("v1/",              v1_api_urls),
-    path("v1",               RedirectView.as_view(url='/api/v1/docs')),
-
-    # ... v2 can be added here ...
-    # path("v2/",              v2_api_urls),
-    # path("v2",               RedirectView.as_view(url='/api/v2/docs')),
-]
--- a/archivebox/api/v1_api.py
+++ b/archivebox/api/v1_api.py
@ -1,111 +0,0 @@
-__package__ = 'archivebox.api'
-
-
-from io import StringIO
-from traceback import format_exception
-from contextlib import redirect_stdout, redirect_stderr
-
-from django.http import HttpRequest, HttpResponse
-from django.core.exceptions import ObjectDoesNotExist, EmptyResultSet, PermissionDenied
-
-from ninja import NinjaAPI, Swagger
-
-# TODO: explore adding https://eadwincode.github.io/django-ninja-extra/
-
-from api.auth import API_AUTH_METHODS
-from ..config import VERSION, COMMIT_HASH
-
-
-COMMIT_HASH = COMMIT_HASH or 'unknown'
-
-html_description=f'''
-<h3>Welcome to your ArchiveBox server's REST API <code>[v1 ALPHA]</code> homepage!</h3>
-<br/>
-<i><b>WARNING: This API is still in an early development stage and may change!</b></i>
-<br/>
-<ul>
-<li>⬅️ Manage your server: <a href="/admin/api/"><b>Setup API Keys</b></a>, <a href="/admin/">Go to your Server Admin UI</a>, <a href="/">Go to your Snapshots list</a> 
-<li>💬 Ask questions and get help here: <a href="https://zulip.archivebox.io">ArchiveBox Chat Forum</a></li>
-<li>🐞 Report API bugs here: <a href="https://github.com/ArchiveBox/ArchiveBox/issues">Github Issues</a></li>
-<li>📚 ArchiveBox Documentation: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki">Github Wiki</a></li>
-<li>📜 See the API source code: <a href="https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/api"><code>archivebox/api/</code></a></li>
-</ul>
-<small>Served by ArchiveBox v{VERSION} (<a href="https://github.com/ArchiveBox/ArchiveBox/commit/{COMMIT_HASH}"><code>{COMMIT_HASH[:8]}</code></a>), API powered by <a href="https://django-ninja.dev/"><code>django-ninja</code></a>.</small>
-'''
-
-
-def register_urls(api: NinjaAPI) -> NinjaAPI:
-    api.add_router('/auth/',     'api.v1_auth.router')
-    api.add_router('/core/',     'api.v1_core.router')
-    api.add_router('/cli/',      'api.v1_cli.router')
-    return api
-
-
-class NinjaAPIWithIOCapture(NinjaAPI):    
-    def create_temporal_response(self, request: HttpRequest) -> HttpResponse:
-        stdout, stderr = StringIO(), StringIO()
-
-        with redirect_stderr(stderr):
-            with redirect_stdout(stdout):
-                request.stdout = stdout
-                request.stderr = stderr
-
-                response = super().create_temporal_response(request)
-
-        print('RESPONDING NOW', response)
-
-        return response
-
-
-api = NinjaAPIWithIOCapture(
-    title='ArchiveBox API',
-    description=html_description,
-    version='1.0.0',
-    csrf=False,
-    auth=API_AUTH_METHODS,
-    urls_namespace="api",
-    docs=Swagger(settings={"persistAuthorization": True}),
-    # docs_decorator=login_required,
-    # renderer=ORJSONRenderer(),
-)
-api = register_urls(api)
-urls = api.urls
-
-
-@api.exception_handler(Exception)
-def generic_exception_handler(request, err):
-    status = 503
-    if isinstance(err, (ObjectDoesNotExist, EmptyResultSet, PermissionDenied)):
-        status = 404
-
-    print(''.join(format_exception(err)))
-
-    return api.create_response(
-        request,
-        {
-            "succeeded": False,
-            "message": f'{err.__class__.__name__}: {err}',
-            "errors": [
-                ''.join(format_exception(err)),
-                # or send simpler parent-only traceback:
-                # *([str(err.__context__)] if getattr(err, '__context__', None) else []),
-            ],
-        },
-        status=status,
-    )
-
-
-
-# import orjson
-# from ninja.renderers import BaseRenderer
-# class ORJSONRenderer(BaseRenderer):
-#     media_type = "application/json"
-#     def render(self, request, data, *, response_status):
-#         return {
-#             "success": True,
-#             "errors": [],
-#             "result": data,
-#             "stdout": ansi_to_html(stdout.getvalue().strip()),
-#             "stderr": ansi_to_html(stderr.getvalue().strip()),
-#         }
-#         return orjson.dumps(data)
--- a/archivebox/api/v1_auth.py
+++ b/archivebox/api/v1_auth.py
@ -1,52 +0,0 @@
-__package__ = 'archivebox.api'
-
-from typing import Optional
-
-from ninja import Router, Schema
-
-from api.models import APIToken
-from api.auth import auth_using_token, auth_using_password
-
-
-router = Router(tags=['Authentication'])
-
-
-class PasswordAuthSchema(Schema):
-    """Schema for a /get_api_token request"""
-    username: Optional[str] = None
-    password: Optional[str] = None
-
-
-@router.post("/get_api_token", auth=None, summary='Generate an API token for a given username & password (or currently logged-in user)')             # auth=None because they are not authed yet
-def get_api_token(request, auth_data: PasswordAuthSchema):
-    user = auth_using_password(
-        username=auth_data.username,
-        password=auth_data.password,
-        request=request,
-    )
-
-    if user:
-        # TODO: support multiple tokens in the future, for now we just have one per user
-        api_token, created = APIToken.objects.get_or_create(user=user)
-
-        return api_token.__json__()
-    
-    return {"success": False, "errors": ["Invalid credentials"]}
-
-
-
-class TokenAuthSchema(Schema):
-    """Schema for a /check_api_token request"""
-    token: str
-
-
-@router.post("/check_api_token", auth=None, summary='Validate an API token to make sure its valid and non-expired')        # auth=None because they are not authed yet
-def check_api_token(request, token_data: TokenAuthSchema):
-    user = auth_using_token(
-        token=token_data.token,
-        request=request,
-    )
-    if user:
-        return {"success": True, "user_id": str(user.pk)}
-    
-    return {"success": False, "user_id": None}
--- a/archivebox/api/v1_cli.py
+++ b/archivebox/api/v1_cli.py
@ -1,234 +0,0 @@
-__package__ = 'archivebox.api'
-
-from typing import List, Dict, Any, Optional
-from enum import Enum
-
-from ninja import Router, Schema
-
-from ..main import (
-    add,
-    remove,
-    update,
-    list_all,
-    schedule,
-)
-from ..util import ansi_to_html
-from ..config import ONLY_NEW
-
-
-# router for API that exposes archivebox cli subcommands as REST endpoints
-router = Router(tags=['ArchiveBox CLI Sub-Commands'])
-
-
-# Schemas
-
-JSONType = List[Any] | Dict[str, Any] | bool | int | str | None
-
-class CLICommandResponseSchema(Schema):
-    success: bool
-    errors: List[str]
-    result: JSONType
-    stdout: str
-    stderr: str
-
-class FilterTypeChoices(str, Enum):
-    exact = 'exact'
-    substring = 'substring'
-    regex = 'regex'
-    domain = 'domain'
-    tag = 'tag'
-    timestamp = 'timestamp'
-
-class StatusChoices(str, Enum):
-    indexed = 'indexed'
-    archived = 'archived'
-    unarchived = 'unarchived'
-    present = 'present'
-    valid = 'valid'
-    invalid = 'invalid'
-    duplicate = 'duplicate'
-    orphaned = 'orphaned'
-    corrupted = 'corrupted'
-    unrecognized = 'unrecognized'
-
-
-class AddCommandSchema(Schema):
-    urls: List[str]
-    tag: str = ""
-    depth: int = 0
-    update: bool = not ONLY_NEW  # Default to the opposite of ONLY_NEW
-    update_all: bool = False
-    index_only: bool = False
-    overwrite: bool = False
-    init: bool = False
-    extractors: str = ""
-    parser: str = "auto"
-
-class UpdateCommandSchema(Schema):
-    resume: Optional[float] = 0
-    only_new: bool = ONLY_NEW
-    index_only: bool = False
-    overwrite: bool = False
-    after: Optional[float] = 0
-    before: Optional[float] = 999999999999999
-    status: Optional[StatusChoices] = StatusChoices.unarchived
-    filter_type: Optional[str] = FilterTypeChoices.substring
-    filter_patterns: Optional[List[str]] = ['https://example.com']
-    extractors: Optional[str] = ""
-
-class ScheduleCommandSchema(Schema):
-    import_path: Optional[str] = None
-    add: bool = False
-    every: Optional[str] = None
-    tag: str = ''
-    depth: int = 0
-    overwrite: bool = False
-    update: bool = not ONLY_NEW
-    clear: bool = False
-
-class ListCommandSchema(Schema):
-    filter_patterns: Optional[List[str]] = ['https://example.com']
-    filter_type: str = FilterTypeChoices.substring
-    status: Optional[StatusChoices] = StatusChoices.indexed
-    after: Optional[float] = 0
-    before: Optional[float] = 999999999999999
-    sort: str = 'added'
-    as_json: bool = True
-    as_html: bool = False
-    as_csv: str | bool = 'timestamp,url'
-    with_headers: bool = False
-
-class RemoveCommandSchema(Schema):
-    delete: bool = True
-    after: Optional[float] = 0
-    before: Optional[float] = 999999999999999
-    filter_type: str = FilterTypeChoices.exact
-    filter_patterns: Optional[List[str]] = ['https://example.com']
-
-
-
-
-
-@router.post("/add", response=CLICommandResponseSchema, summary='archivebox add [args] [urls]')
-def cli_add(request, args: AddCommandSchema):
-    result = add(
-        urls=args.urls,
-        tag=args.tag,
-        depth=args.depth,
-        update=args.update,
-        update_all=args.update_all,
-        index_only=args.index_only,
-        overwrite=args.overwrite,
-        init=args.init,
-        extractors=args.extractors,
-        parser=args.parser,
-    )
-
-    return {
-        "success": True,
-        "errors": [],
-        "result": result,
-        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
-        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
-    }
-
-
-@router.post("/update", response=CLICommandResponseSchema, summary='archivebox update [args] [filter_patterns]')
-def cli_update(request, args: UpdateCommandSchema):
-    result = update(
-        resume=args.resume,
-        only_new=args.only_new,
-        index_only=args.index_only,
-        overwrite=args.overwrite,
-        before=args.before,
-        after=args.after,
-        status=args.status,
-        filter_type=args.filter_type,
-        filter_patterns=args.filter_patterns,
-        extractors=args.extractors,
-    )
-    return {
-        "success": True,
-        "errors": [],
-        "result": result,
-        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
-        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
-    }
-
-
-@router.post("/schedule", response=CLICommandResponseSchema, summary='archivebox schedule [args] [import_path]')
-def cli_schedule(request, args: ScheduleCommandSchema):
-    result = schedule(
-        import_path=args.import_path,
-        add=args.add,
-        show=args.show,
-        clear=args.clear,
-        every=args.every,
-        tag=args.tag,
-        depth=args.depth,
-        overwrite=args.overwrite,
-        update=args.update,
-    )
-
-    return {
-        "success": True,
-        "errors": [],
-        "result": result,
-        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
-        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
-    }
-
-
-
-@router.post("/list", response=CLICommandResponseSchema, summary='archivebox list [args] [filter_patterns]')
-def cli_list(request, args: ListCommandSchema):
-    result = list_all(
-        filter_patterns=args.filter_patterns,
-        filter_type=args.filter_type,
-        status=args.status,
-        after=args.after,
-        before=args.before,
-        sort=args.sort,
-        csv=args.as_csv,
-        json=args.as_json,
-        html=args.as_html,
-        with_headers=args.with_headers,
-    )
-
-    result_format = 'txt'
-    if args.as_json:
-        result_format = "json"
-    elif args.as_html:
-        result_format = "html"
-    elif args.as_csv:
-        result_format = "csv"
-
-    return {
-        "success": True,
-        "errors": [],
-        "result": result,
-        "result_format": result_format,
-        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
-        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
-    }
-    
-
-
-@router.post("/remove", response=CLICommandResponseSchema, summary='archivebox remove [args] [filter_patterns]')
-def cli_remove(request, args: RemoveCommandSchema):
-    result = remove(
-        yes=True,            # no way to interactively ask for confirmation via API, so we force yes
-        delete=args.delete,
-        before=args.before,
-        after=args.after,
-        filter_type=args.filter_type,
-        filter_patterns=args.filter_patterns,
-    )
-    return {
-        "success": True,
-        "errors": [],
-        "result": result,
-        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
-        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
-    }
-    
--- a/archivebox/api/v1_core.py
+++ b/archivebox/api/v1_core.py
@ -1,291 +0,0 @@
-__package__ = 'archivebox.api'
-
-from uuid import UUID
-from typing import List, Optional
-from datetime import datetime
-
-from django.db.models import Q
-from django.shortcuts import get_object_or_404
-
-from ninja import Router, Schema, FilterSchema, Field, Query
-from ninja.pagination import paginate
-
-from core.models import Snapshot, ArchiveResult, Tag
-from abid_utils.abid import ABID
-
-router = Router(tags=['Core Models'])
-
-
-
-
-### ArchiveResult #########################################################################
-
-class ArchiveResultSchema(Schema):
-    abid: str
-    uuid: UUID
-    pk: str
-    modified: datetime
-    created: datetime
-    created_by_id: str
-
-    snapshot_abid: str
-    snapshot_url: str
-    snapshot_tags: str
-
-    extractor: str
-    cmd_version: str
-    cmd: List[str]
-    pwd: str
-    status: str
-    output: str
-
-    @staticmethod
-    def resolve_created_by_id(obj):
-        return str(obj.created_by_id)
-
-    @staticmethod
-    def resolve_pk(obj):
-        return str(obj.pk)
-
-    @staticmethod
-    def resolve_uuid(obj):
-        return str(obj.uuid)
-
-    @staticmethod
-    def resolve_abid(obj):
-        return str(obj.ABID)
-
-    @staticmethod
-    def resolve_created(obj):
-        return obj.start_ts
-
-    @staticmethod
-    def resolve_snapshot_url(obj):
-        return obj.snapshot.url
-
-    @staticmethod
-    def resolve_snapshot_abid(obj):
-        return str(obj.snapshot.ABID)
-
-    @staticmethod
-    def resolve_snapshot_tags(obj):
-        return obj.snapshot.tags_str()
-
-
-class ArchiveResultFilterSchema(FilterSchema):
-    uuid: Optional[UUID] = Field(None, q='uuid')
-    # abid: Optional[str] = Field(None, q='abid')
-
-    search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains'])
-    snapshot_uuid: Optional[UUID] = Field(None, q='snapshot_uuid__icontains')
-    snapshot_url: Optional[str] = Field(None, q='snapshot__url__icontains')
-    snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name__icontains')
-    
-    status: Optional[str] = Field(None, q='status')
-    output: Optional[str] = Field(None, q='output__icontains')
-    extractor: Optional[str] = Field(None, q='extractor__icontains')
-    cmd: Optional[str] = Field(None, q='cmd__0__icontains')
-    pwd: Optional[str] = Field(None, q='pwd__icontains')
-    cmd_version: Optional[str] = Field(None, q='cmd_version')
-
-    created: Optional[datetime] = Field(None, q='updated')
-    created__gte: Optional[datetime] = Field(None, q='updated__gte')
-    created__lt: Optional[datetime] = Field(None, q='updated__lt')
-
-
-@router.get("/archiveresults", response=List[ArchiveResultSchema])
-@paginate
-def list_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)):
-    """List all ArchiveResult entries matching these filters."""
-    qs = ArchiveResult.objects.all()
-    results = filters.filter(qs)
-    return results
-
-
-@router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
-def get_archiveresult(request, archiveresult_id: str):
-    """Get a specific ArchiveResult by abid, uuid, or pk."""
-    return ArchiveResult.objects.get(Q(pk__icontains=archiveresult_id) | Q(abid__icontains=archiveresult_id) | Q(uuid__icontains=archiveresult_id))
-
-
-# @router.post("/archiveresult", response=ArchiveResultSchema)
-# def create_archiveresult(request, payload: ArchiveResultSchema):
-#     archiveresult = ArchiveResult.objects.create(**payload.dict())
-#     return archiveresult
-#
-# @router.put("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
-# def update_archiveresult(request, archiveresult_id: str, payload: ArchiveResultSchema):
-#     archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
-#   
-#     for attr, value in payload.dict().items():
-#         setattr(archiveresult, attr, value)
-#     archiveresult.save()
-#
-#     return archiveresult
-#
-# @router.delete("/archiveresult/{archiveresult_id}")
-# def delete_archiveresult(request, archiveresult_id: str):
-#     archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
-#     archiveresult.delete()
-#     return {"success": True}
-
-
-
-
-
-### Snapshot #########################################################################
-
-
-class SnapshotSchema(Schema):
-    abid: str
-    uuid: UUID
-    pk: str
-    modified: datetime
-    created: datetime
-    created_by_id: str
-
-    url: str
-    tags: str
-    title: Optional[str]
-    timestamp: str
-    archive_path: str
-
-    bookmarked: datetime
-    added: datetime
-    updated: Optional[datetime]
-
-    num_archiveresults: int
-    archiveresults: List[ArchiveResultSchema]
-
-    @staticmethod
-    def resolve_created_by_id(obj):
-        return str(obj.created_by_id)
-
-    @staticmethod
-    def resolve_pk(obj):
-        return str(obj.pk)
-
-    @staticmethod
-    def resolve_uuid(obj):
-        return str(obj.uuid)
-
-    @staticmethod
-    def resolve_abid(obj):
-        return str(obj.ABID)
-
-    @staticmethod
-    def resolve_tags(obj):
-        return obj.tags_str()
-
-    @staticmethod
-    def resolve_num_archiveresults(obj, context):
-        return obj.archiveresult_set.all().distinct().count()
-
-    @staticmethod
-    def resolve_archiveresults(obj, context):
-        if context['request'].with_archiveresults:
-            return obj.archiveresult_set.all().distinct()
-        return ArchiveResult.objects.none()
-
-
-class SnapshotFilterSchema(FilterSchema):
-    abid: Optional[str] = Field(None, q='abid__icontains')
-    uuid: Optional[str] = Field(None, q='uuid__icontains')
-    pk: Optional[str] = Field(None, q='pk__icontains')
-    created_by_id: str = Field(None, q='created_by_id__icontains')
-    created__gte: datetime = Field(None, q='created__gte')
-    created__lt: datetime = Field(None, q='created__lt')
-    created: datetime = Field(None, q='created')
-    modified: datetime = Field(None, q='modified')
-    modified__gte: datetime = Field(None, q='modified__gte')
-    modified__lt: datetime = Field(None, q='modified__lt')
-
-    search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'abid__icontains', 'uuid__icontains'])
-    url: Optional[str] = Field(None, q='url')
-    tag: Optional[str] = Field(None, q='tags__name')
-    title: Optional[str] = Field(None, q='title__icontains')
-    timestamp: Optional[str] = Field(None, q='timestamp__startswith')
-    
-    added__gte: Optional[datetime] = Field(None, q='added__gte')
-    added__lt: Optional[datetime] = Field(None, q='added__lt')
-
-
-
-@router.get("/snapshots", response=List[SnapshotSchema])
-@paginate
-def list_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=True):
-    """List all Snapshot entries matching these filters."""
-    request.with_archiveresults = with_archiveresults
-
-    qs = Snapshot.objects.all()
-    results = filters.filter(qs)
-    return results
-
-@router.get("/snapshot/{snapshot_id}", response=SnapshotSchema)
-def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
-    """Get a specific Snapshot by abid, uuid, or pk."""
-    request.with_archiveresults = with_archiveresults
-    snapshot = None
-    try:
-        snapshot = Snapshot.objects.get(Q(uuid__startswith=snapshot_id) | Q(abid__startswith=snapshot_id)| Q(pk__startswith=snapshot_id))
-    except Snapshot.DoesNotExist:
-        pass
-
-    try:
-        snapshot = snapshot or Snapshot.objects.get()
-    except Snapshot.DoesNotExist:
-        pass
-
-    try:
-        snapshot = snapshot or Snapshot.objects.get(Q(uuid__icontains=snapshot_id) | Q(abid__icontains=snapshot_id))
-    except Snapshot.DoesNotExist:
-        pass
-
-    return snapshot
-
-
-# @router.post("/snapshot", response=SnapshotSchema)
-# def create_snapshot(request, payload: SnapshotSchema):
-#     snapshot = Snapshot.objects.create(**payload.dict())
-#     return snapshot
-#
-# @router.put("/snapshot/{snapshot_uuid}", response=SnapshotSchema)
-# def update_snapshot(request, snapshot_uuid: str, payload: SnapshotSchema):
-#     snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid)
-#
-#     for attr, value in payload.dict().items():
-#         setattr(snapshot, attr, value)
-#     snapshot.save()
-#
-#     return snapshot
-#
-# @router.delete("/snapshot/{snapshot_uuid}")
-# def delete_snapshot(request, snapshot_uuid: str):
-#     snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid)
-#     snapshot.delete()
-#     return {"success": True}
-
-
-
-### Tag #########################################################################
-
-
-class TagSchema(Schema):
-    abid: Optional[UUID] = Field(None, q='abid')
-    uuid: Optional[UUID] = Field(None, q='uuid')
-    pk: Optional[UUID] = Field(None, q='pk')
-    modified: datetime
-    created: datetime
-    created_by_id: str
-
-    name: str
-    slug: str
-
-
-    @staticmethod
-    def resolve_created_by_id(obj):
-        return str(obj.created_by_id)
-
-@router.get("/tags", response=List[TagSchema])
-def list_tags(request):
-    return Tag.objects.all()
--- a/archivebox/cli/init.py
+++ b/archivebox/cli/init.py
@ -4,18 +4,14 @@ __command__ = 'archivebox'
 import os
 import sys
 import argparse
-import threading
-from time import sleep

-from typing import Optional, Dict, List, IO, Union, Iterable
+from typing import Optional, Dict, List, IO, Union
 from pathlib import Path

-from ..config import OUTPUT_DIR, check_data_folder, check_migrations, stderr
+from ..config import OUTPUT_DIR, check_data_folder, check_migrations

 from importlib import import_module

-BUILTIN_LIST = list
-
 CLI_DIR = Path(__file__).resolve().parent

 # these common commands will appear sorted before any others for ease-of-use
@ -37,40 +33,6 @@ is_valid_cli_module = lambda module, subcommand: (
 )


-IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread')  # threads we dont have to wait for before exiting
-
-
-def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: Iterable[str]=IGNORED_BG_THREADS, timeout: int=60) -> int:
-    """
-    Block until the specified threads exit. e.g. pass thread_names=('default_hook_handler',) to wait for webhooks.
-    Useful for waiting for signal handlers, webhooks, etc. to finish running after a mgmt command completes.
-    """
-
-    wait_for_all: bool = thread_names == ()
-
-    thread_matches = lambda thread, ptns: any(ptn in repr(thread) for ptn in ptns)
-
-    should_wait = lambda thread: (
-        not thread_matches(thread, ignore_names)
-        and (wait_for_all or thread_matches(thread, thread_names)))
-
-    for tries in range(timeout):
-        all_threads = [*threading.enumerate()]
-        blocking_threads = [*filter(should_wait, all_threads)]
-        threads_summary = ', '.join(repr(t) for t in blocking_threads)
-        if blocking_threads:
-            sleep(1)
-            if tries == 5:                            # only show stderr message if we need to wait more than 5s
-                stderr(
-                    f'[…] Waiting up to {timeout}s for background jobs (e.g. webhooks) to finish...',
-                    threads_summary,
-                )
-        else:
-            return tries
-
-    raise Exception('Background threads failed to exit after {tries}s: {threads_summary}')
-
-
 def list_subcommands() -> Dict[str, str]:
    """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""

@ -117,9 +79,6 @@ def run_subcommand(subcommand: str,
    module = import_module('.archivebox_{}'.format(subcommand), __package__)
    module.main(args=subcommand_args, stdin=stdin, pwd=pwd)    # type: ignore

-    # wait for webhooks, signals, and other background jobs to finish before exit
-    wait_for_bg_threads_to_exit(timeout=60)
-

 SUBCOMMANDS = list_subcommands()

--- a/archivebox/config.py
+++ b/archivebox/config.py
@ -37,7 +37,7 @@ from sqlite3 import dbapi2 as sqlite3
 from hashlib import md5
 from pathlib import Path
 from datetime import datetime, timezone
-from typing import Optional, Type, Tuple, Dict, Union, List, Any
+from typing import Optional, Type, Tuple, Dict, Union, List
 from subprocess import run, PIPE, DEVNULL
 from configparser import ConfigParser
 from collections import defaultdict
@ -72,7 +72,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
        'TIMEOUT':                  {'type': int,   'default': 60},
        'MEDIA_TIMEOUT':            {'type': int,   'default': 3600},
        'OUTPUT_PERMISSIONS':       {'type': str,   'default': '644'},
-        'RESTRICT_FILE_NAMES':      {'type': str,   'default': 'windows'},  # TODO: move this to be a default WGET_ARGS
+        'RESTRICT_FILE_NAMES':      {'type': str,   'default': 'windows'},

        'URL_DENYLIST':             {'type': str,   'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$', 'aliases': ('URL_BLACKLIST',)},  # to avoid downloading code assets as their own pages
        'URL_ALLOWLIST':            {'type': str,   'default': None, 'aliases': ('URL_WHITELIST',)},
@ -112,7 +112,6 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
        'LDAP_FIRSTNAME_ATTR':       {'type': str,   'default': None},
        'LDAP_LASTNAME_ATTR':        {'type': str,   'default': None},
        'LDAP_EMAIL_ATTR':           {'type': str,   'default': None},
-        'LDAP_CREATE_SUPERUSER':     {'type': bool,  'default': False},
    },

    'ARCHIVE_METHOD_TOGGLES': {
@ -137,15 +136,14 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
    },

    'ARCHIVE_METHOD_OPTIONS': {
-        'RESOLUTION':               {'type': str,   'default': '1440,2000', 'aliases': ('SCREENSHOT_RESOLUTION','WINDOW_SIZE')},
-        'GIT_DOMAINS':              {'type': str,   'default': 'github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht'},
+        'RESOLUTION':               {'type': str,   'default': '1440,2000', 'aliases': ('SCREENSHOT_RESOLUTION',)},
+        'GIT_DOMAINS':              {'type': str,   'default': 'github.com,bitbucket.org,gitlab.com,gist.github.com'},
        'CHECK_SSL_VALIDITY':       {'type': bool,  'default': True},
        'MEDIA_MAX_SIZE':           {'type': str,   'default': '750m'},

-        'USER_AGENT':               {'type': str,   'default': None},
-        'CURL_USER_AGENT':          {'type': str,   'default': lambda c: c['USER_AGENT'] or 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) curl/{CURL_VERSION}'},
-        'WGET_USER_AGENT':          {'type': str,   'default': lambda c: c['USER_AGENT'] or 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) wget/{WGET_VERSION}'},
-        'CHROME_USER_AGENT':        {'type': str,   'default': lambda c: c['USER_AGENT'] or 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)'},
+        'CURL_USER_AGENT':          {'type': str,   'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) curl/{CURL_VERSION}'},
+        'WGET_USER_AGENT':          {'type': str,   'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) wget/{WGET_VERSION}'},
+        'CHROME_USER_AGENT':        {'type': str,   'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)'},

        'COOKIES_FILE':             {'type': str,   'default': None},
        'CHROME_USER_DATA_DIR':     {'type': str,   'default': None},
@ -153,11 +151,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
        'CHROME_TIMEOUT':           {'type': int,   'default': 0},
        'CHROME_HEADLESS':          {'type': bool,  'default': True},
        'CHROME_SANDBOX':           {'type': bool,  'default': lambda c: not c['IN_DOCKER']},
-        'CHROME_EXTRA_ARGS':        {'type': list,  'default': None},
-
        'YOUTUBEDL_ARGS':           {'type': list,  'default': lambda c: [
-                                                                '--restrict-filenames',
-                                                                '--trim-filenames', '128',
                                                                '--write-description',
                                                                '--write-info-json',
                                                                '--write-annotations',
@ -179,7 +173,6 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
                                                                '--add-metadata',
                                                                '--format=(bv*+ba/b)[filesize<={}][filesize_approx<=?{}]/(bv*+ba/b)'.format(c['MEDIA_MAX_SIZE'], c['MEDIA_MAX_SIZE']),
                                                                ]},
-        'YOUTUBEDL_EXTRA_ARGS':     {'type': list,  'default': None},


        'WGET_ARGS':                {'type': list,  'default': ['--no-verbose',
@ -191,17 +184,12 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
                                                                '--no-parent',
                                                                '-e', 'robots=off',
                                                                ]},
-        'WGET_EXTRA_ARGS':          {'type': list,  'default': None},
        'CURL_ARGS':                {'type': list,  'default': ['--silent',
                                                                '--location',
                                                                '--compressed'
                                                               ]},
-        'CURL_EXTRA_ARGS':          {'type': list,  'default': None},
        'GIT_ARGS':                 {'type': list,  'default': ['--recursive']},
-        'SINGLEFILE_ARGS':          {'type': list,  'default': None},
-        'SINGLEFILE_EXTRA_ARGS':    {'type': list,  'default': None},
-        'MERCURY_ARGS':             {'type': list,  'default': ['--format=text']},
-        'MERCURY_EXTRA_ARGS':       {'type': list,  'default': None},
+        'SINGLEFILE_ARGS':          {'type': list,  'default' : None},
        'FAVICON_PROVIDER':         {'type': str,   'default': 'https://www.google.com/s2/favicons?domain={}'},
    },

@ -265,7 +253,7 @@ CONFIG_ALIASES = {
        for key, default in section.items()
            for alias in default.get('aliases', ())
 }
-USER_CONFIG = {key: section[key] for section in CONFIG_SCHEMA.values() for key in section.keys()}
+USER_CONFIG = {key for section in CONFIG_SCHEMA.values() for key in section.keys()}

 def get_real_name(key: str) -> str:
    """get the current canonical name for a given deprecated config key"""
@ -281,9 +269,6 @@ TEMPLATES_DIR_NAME = 'templates'
 ARCHIVE_DIR_NAME = 'archive'
 SOURCES_DIR_NAME = 'sources'
 LOGS_DIR_NAME = 'logs'
-CACHE_DIR_NAME = 'cache'
-PERSONAS_DIR_NAME = 'personas'
-CRONTABS_DIR_NAME = 'crontabs'
 SQL_INDEX_FILENAME = 'index.sqlite3'
 JSON_INDEX_FILENAME = 'index.json'
 HTML_INDEX_FILENAME = 'index.html'
@ -357,12 +342,9 @@ ALLOWED_IN_OUTPUT_DIR = {
    'static',
    'sonic',
    'search.sqlite3',
-    CRONTABS_DIR_NAME,
    ARCHIVE_DIR_NAME,
    SOURCES_DIR_NAME,
    LOGS_DIR_NAME,
-    CACHE_DIR_NAME,
-    PERSONAS_DIR_NAME,
    SQL_INDEX_FILENAME,
    f'{SQL_INDEX_FILENAME}-wal',
    f'{SQL_INDEX_FILENAME}-shm',
@ -381,32 +363,24 @@ ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE

 ############################## Version Config ##################################

-def get_system_user() -> str:
-    # some host OS's are unable to provide a username (k3s, Windows), making this complicated
-    # uid 999 is especially problematic and breaks many attempts
-    SYSTEM_USER = None
-    FALLBACK_USER_PLACHOLDER = f'user_{os.getuid()}'
-
-    # Option 1
+def get_system_user():
+    SYSTEM_USER = getpass.getuser() or os.getlogin()
    try:
        import pwd
-        SYSTEM_USER = SYSTEM_USER or pwd.getpwuid(os.geteuid()).pw_name
-    except (ModuleNotFoundError, Exception):
+        return pwd.getpwuid(os.geteuid()).pw_name or SYSTEM_USER
+    except KeyError:
+        # Process' UID might not map to a user in cases such as running the Docker image
+        # (where `archivebox` is 999) as a different UID.
+        pass
+    except ModuleNotFoundError:
+        # pwd doesn't exist on windows
        pass
-
-    # Option 2
-    try:
-        SYSTEM_USER = SYSTEM_USER or getpass.getuser()
    except Exception:
+        # this should never happen, uncomment to debug
+        # raise
        pass

-    # Option 3
-    try:
-        SYSTEM_USER = SYSTEM_USER or os.getlogin()
-    except Exception:
-        pass
-
-    return SYSTEM_USER or FALLBACK_USER_PLACHOLDER
+    return SYSTEM_USER

 def get_version(config):
    try:
@ -513,11 +487,9 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
    'ARCHIVE_DIR':              {'default': lambda c: c['OUTPUT_DIR'] / ARCHIVE_DIR_NAME},
    'SOURCES_DIR':              {'default': lambda c: c['OUTPUT_DIR'] / SOURCES_DIR_NAME},
    'LOGS_DIR':                 {'default': lambda c: c['OUTPUT_DIR'] / LOGS_DIR_NAME},
-    'CACHE_DIR':                {'default': lambda c: c['OUTPUT_DIR'] / CACHE_DIR_NAME},
-    'PERSONAS_DIR':             {'default': lambda c: c['OUTPUT_DIR'] / PERSONAS_DIR_NAME},
    'CONFIG_FILE':              {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME},
    'COOKIES_FILE':             {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()},
-    'CHROME_USER_DATA_DIR':     {'default': lambda c: Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None},
+    'CHROME_USER_DATA_DIR':     {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None)},   # None means unset, so we autodetect it with find_chrome_Data_dir(), but emptystring '' means user manually set it to '', and we should store it as None
    'URL_DENYLIST_PTN':         {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
    'URL_ALLOWLIST_PTN':        {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
    'DIR_OUTPUT_PERMISSIONS':   {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')},  # exec is always needed to list directories
@ -547,7 +519,6 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
    'CURL_VERSION':             {'default': lambda c: bin_version(c['CURL_BINARY']) if c['USE_CURL'] else None},
    'CURL_USER_AGENT':          {'default': lambda c: c['CURL_USER_AGENT'].format(**c)},
    'CURL_ARGS':                {'default': lambda c: c['CURL_ARGS'] or []},
-    'CURL_EXTRA_ARGS':          {'default': lambda c: c['CURL_EXTRA_ARGS'] or []},
    'SAVE_FAVICON':             {'default': lambda c: c['USE_CURL'] and c['SAVE_FAVICON']},
    'SAVE_ARCHIVE_DOT_ORG':     {'default': lambda c: c['USE_CURL'] and c['SAVE_ARCHIVE_DOT_ORG']},

@ -558,22 +529,18 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
    'SAVE_WGET':                {'default': lambda c: c['USE_WGET'] and c['SAVE_WGET']},
    'SAVE_WARC':                {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']},
    'WGET_ARGS':                {'default': lambda c: c['WGET_ARGS'] or []},
-    'WGET_EXTRA_ARGS':          {'default': lambda c: c['WGET_EXTRA_ARGS'] or []},

    'RIPGREP_VERSION':          {'default': lambda c: bin_version(c['RIPGREP_BINARY']) if c['USE_RIPGREP'] else None},

    'USE_SINGLEFILE':           {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
    'SINGLEFILE_VERSION':       {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
    'SINGLEFILE_ARGS':          {'default': lambda c: c['SINGLEFILE_ARGS'] or []},
-    'SINGLEFILE_EXTRA_ARGS':    {'default': lambda c: c['SINGLEFILE_EXTRA_ARGS'] or []},

    'USE_READABILITY':          {'default': lambda c: c['USE_READABILITY'] and c['SAVE_READABILITY']},
    'READABILITY_VERSION':      {'default': lambda c: bin_version(c['READABILITY_BINARY']) if c['USE_READABILITY'] else None},

    'USE_MERCURY':              {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']},
    'MERCURY_VERSION':          {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None},  # mercury doesnt expose version info until this is merged https://github.com/postlight/parser/pull/750
-    'MERCURY_ARGS':             {'default': lambda c: c['MERCURY_ARGS'] or []},
-    'MERCURY_EXTRA_ARGS':       {'default': lambda c: c['MERCURY_EXTRA_ARGS'] or []},

    'USE_GIT':                  {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
    'GIT_VERSION':              {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},
@ -583,7 +550,6 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
    'YOUTUBEDL_VERSION':        {'default': lambda c: bin_version(c['YOUTUBEDL_BINARY']) if c['USE_YOUTUBEDL'] else None},
    'SAVE_MEDIA':               {'default': lambda c: c['USE_YOUTUBEDL'] and c['SAVE_MEDIA']},
    'YOUTUBEDL_ARGS':           {'default': lambda c: c['YOUTUBEDL_ARGS'] or []},
-    'YOUTUBEDL_EXTRA_ARGS':     {'default': lambda c: c['YOUTUBEDL_EXTRA_ARGS'] or []},

    'CHROME_BINARY':            {'default': lambda c: c['CHROME_BINARY'] or find_chrome_binary()},
    'USE_CHROME':               {'default': lambda c: c['USE_CHROME'] and c['CHROME_BINARY'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'] or c['SAVE_SINGLEFILE'])},
@ -602,9 +568,9 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {

    'DEPENDENCIES':             {'default': lambda c: get_dependency_info(c)},
    'CODE_LOCATIONS':           {'default': lambda c: get_code_locations(c)},
+    'EXTERNAL_LOCATIONS':       {'default': lambda c: get_external_locations(c)},
    'DATA_LOCATIONS':           {'default': lambda c: get_data_locations(c)},
    'CHROME_OPTIONS':           {'default': lambda c: get_chrome_info(c)},
-    'CHROME_EXTRA_ARGS':        {'default': lambda c: c['CHROME_EXTRA_ARGS'] or []},
    'SAVE_ALLOWLIST_PTN':       {'default': lambda c: c['SAVE_ALLOWLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_ALLOWLIST'].items()}},
    'SAVE_DENYLIST_PTN':        {'default': lambda c: c['SAVE_DENYLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_DENYLIST'].items()}},
 }
@ -933,36 +899,27 @@ def find_chrome_binary() -> Optional[str]:

 def find_chrome_data_dir() -> Optional[str]:
    """find any installed chrome user data directories in the default locations"""
-    # deprecated because this is DANGEROUS, do not re-implement/uncomment this behavior.
-
-    # Going forward we want to discourage people from using their main chrome profile for archiving.
-    # Session tokens, personal data, and cookies are often returned in server responses,
-    # when they get archived, they are essentially burned as anyone who can view the archive
-    # can use that data to masquerade as the logged-in user that did the archiving.
-    # For this reason users should always create dedicated burner profiles for archiving and not use
-    # their daily driver main accounts.
-
-    # # Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev
-    # # make sure data dir finding precedence order always matches binary finding order
-    # default_profile_paths = (
-    #     '~/.config/chromium',
-    #     '~/Library/Application Support/Chromium',
-    #     '~/AppData/Local/Chromium/User Data',
-    #     '~/.config/chrome',
-    #     '~/.config/google-chrome',
-    #     '~/Library/Application Support/Google/Chrome',
-    #     '~/AppData/Local/Google/Chrome/User Data',
-    #     '~/.config/google-chrome-stable',
-    #     '~/.config/google-chrome-beta',
-    #     '~/Library/Application Support/Google/Chrome Canary',
-    #     '~/AppData/Local/Google/Chrome SxS/User Data',
-    #     '~/.config/google-chrome-unstable',
-    #     '~/.config/google-chrome-dev',
-    # )
-    # for path in default_profile_paths:
-    #     full_path = Path(path).resolve()
-    #     if full_path.exists():
-    #         return full_path
+    # Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev
+    # make sure data dir finding precedence order always matches binary finding order
+    default_profile_paths = (
+        '~/.config/chromium',
+        '~/Library/Application Support/Chromium',
+        '~/AppData/Local/Chromium/User Data',
+        '~/.config/chrome',
+        '~/.config/google-chrome',
+        '~/Library/Application Support/Google/Chrome',
+        '~/AppData/Local/Google/Chrome/User Data',
+        '~/.config/google-chrome-stable',
+        '~/.config/google-chrome-beta',
+        '~/Library/Application Support/Google/Chrome Canary',
+        '~/AppData/Local/Google/Chrome SxS/User Data',
+        '~/.config/google-chrome-unstable',
+        '~/.config/google-chrome-dev',
+    )
+    for path in default_profile_paths:
+        full_path = Path(path).resolve()
+        if full_path.exists():
+            return full_path
    return None

 def wget_supports_compression(config):
@ -988,6 +945,11 @@ def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
            'enabled': True,
            'is_valid': (config['TEMPLATES_DIR'] / 'static').exists(),
        },
+        'CUSTOM_TEMPLATES_DIR': {
+            'path': config['CUSTOM_TEMPLATES_DIR'] and Path(config['CUSTOM_TEMPLATES_DIR']).resolve(),
+            'enabled': bool(config['CUSTOM_TEMPLATES_DIR']),
+            'is_valid': config['CUSTOM_TEMPLATES_DIR'] and Path(config['CUSTOM_TEMPLATES_DIR']).exists(),
+        },
        # 'NODE_MODULES_DIR': {
        #     'path': ,
        #     'enabled': ,
@ -995,25 +957,45 @@ def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
        # },
    }

+def get_external_locations(config: ConfigDict) -> ConfigValue:
+    abspath = lambda path: None if path is None else Path(path).resolve()
+    return {
+        'CHROME_USER_DATA_DIR': {
+            'path': abspath(config['CHROME_USER_DATA_DIR']),
+            'enabled': config['USE_CHROME'] and config['CHROME_USER_DATA_DIR'],
+            'is_valid': False if config['CHROME_USER_DATA_DIR'] is None else (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists(),
+        },
+        'COOKIES_FILE': {
+            'path': abspath(config['COOKIES_FILE']),
+            'enabled': config['USE_WGET'] and config['COOKIES_FILE'],
+            'is_valid': False if config['COOKIES_FILE'] is None else Path(config['COOKIES_FILE']).exists(),
+        },
+    }
+
 def get_data_locations(config: ConfigDict) -> ConfigValue:
    return {
-        # OLD: migrating to personas
-        # 'CHROME_USER_DATA_DIR': {
-        #     'path': os.path.abspath(config['CHROME_USER_DATA_DIR']),
-        #     'enabled': config['USE_CHROME'] and config['CHROME_USER_DATA_DIR'],
-        #     'is_valid': False if config['CHROME_USER_DATA_DIR'] is None else (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists(),
-        # },
-        # 'COOKIES_FILE': {
-        #     'path': os.path.abspath(config['COOKIES_FILE']),
-        #     'enabled': config['USE_WGET'] and config['COOKIES_FILE'],
-        #     'is_valid': False if config['COOKIES_FILE'] is None else Path(config['COOKIES_FILE']).exists(),
-        # },
        'OUTPUT_DIR': {
            'path': config['OUTPUT_DIR'].resolve(),
            'enabled': True,
            'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
            'is_mount': os.path.ismount(config['OUTPUT_DIR'].resolve()),
        },
+        'SOURCES_DIR': {
+            'path': config['SOURCES_DIR'].resolve(),
+            'enabled': True,
+            'is_valid': config['SOURCES_DIR'].exists(),
+        },
+        'LOGS_DIR': {
+            'path': config['LOGS_DIR'].resolve(),
+            'enabled': True,
+            'is_valid': config['LOGS_DIR'].exists(),
+        },
+        'ARCHIVE_DIR': {
+            'path': config['ARCHIVE_DIR'].resolve(),
+            'enabled': True,
+            'is_valid': config['ARCHIVE_DIR'].exists(),
+            'is_mount': os.path.ismount(config['ARCHIVE_DIR'].resolve()),
+        },
        'CONFIG_FILE': {
            'path': config['CONFIG_FILE'].resolve(),
            'enabled': True,
@ -1025,43 +1007,6 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
            'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
            'is_mount': os.path.ismount((config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).resolve()),
        },
-        'ARCHIVE_DIR': {
-            'path': config['ARCHIVE_DIR'].resolve(),
-            'enabled': True,
-            'is_valid': config['ARCHIVE_DIR'].exists(),
-            'is_mount': os.path.ismount(config['ARCHIVE_DIR'].resolve()),
-        },
-        'SOURCES_DIR': {
-            'path': config['SOURCES_DIR'].resolve(),
-            'enabled': True,
-            'is_valid': config['SOURCES_DIR'].exists(),
-        },
-        'LOGS_DIR': {
-            'path': config['LOGS_DIR'].resolve(),
-            'enabled': True,
-            'is_valid': config['LOGS_DIR'].exists(),
-        },
-        'CACHE_DIR': {
-            'path': config['CACHE_DIR'].resolve(),
-            'enabled': True,
-            'is_valid': config['CACHE_DIR'].exists(),
-        },
-        'CUSTOM_TEMPLATES_DIR': {
-            'path': config['CUSTOM_TEMPLATES_DIR'] and Path(config['CUSTOM_TEMPLATES_DIR']).resolve(),
-            'enabled': bool(config['CUSTOM_TEMPLATES_DIR']),
-            'is_valid': config['CUSTOM_TEMPLATES_DIR'] and Path(config['CUSTOM_TEMPLATES_DIR']).exists(),
-        },
-        'PERSONAS_DIR': {
-            'path': config['PERSONAS_DIR'].resolve(),
-            'enabled': True,
-            'is_valid': config['PERSONAS_DIR'].exists(),
-        },
-        # managed by bin/docker_entrypoint.sh and python-crontab:
-        # 'CRONTABS_DIR': {
-        #     'path': config['CRONTABS_DIR'].resolve(),
-        #     'enabled': True,
-        #     'is_valid': config['CRONTABS_DIR'].exists(),
-        # },
    }

 def get_dependency_info(config: ConfigDict) -> ConfigValue:
@ -1296,7 +1241,7 @@ def check_system_config(config: ConfigDict=CONFIG) -> None:

    # stderr('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))
    # stderr('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR)))
-    if config['CHROME_USER_DATA_DIR'] is not None and Path(config['CHROME_USER_DATA_DIR']).exists():
+    if config['CHROME_USER_DATA_DIR'] is not None:
        if not (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists():
            stderr('[X] Could not find profile "Default" in CHROME_USER_DATA_DIR.', color='red')
            stderr(f'    {config["CHROME_USER_DATA_DIR"]}')
@ -1306,13 +1251,8 @@ def check_system_config(config: ConfigDict=CONFIG) -> None:
            if '/Default' in str(config['CHROME_USER_DATA_DIR']):
                stderr()
                stderr('    Try removing /Default from the end e.g.:')
-                stderr('        CHROME_USER_DATA_DIR="{}"'.format(str(config['CHROME_USER_DATA_DIR']).split('/Default')[0]))
-            
-            # hard error is too annoying here, instead just set it to nothing
-            # raise SystemExit(2)
-            config['CHROME_USER_DATA_DIR'] = None
-    else:
-        config['CHROME_USER_DATA_DIR'] = None
+                stderr('        CHROME_USER_DATA_DIR="{}"'.format(config['CHROME_USER_DATA_DIR'].split('/Default')[0]))
+            raise SystemExit(2)


 def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
@ -1381,7 +1321,6 @@ def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=C
        stderr('        archivebox init')
        raise SystemExit(2)

-
 def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG):
    output_dir = out_dir or config['OUTPUT_DIR']
    from .index.sql import list_migrations
@ -1398,9 +1337,6 @@ def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CO

    (Path(output_dir) / SOURCES_DIR_NAME).mkdir(exist_ok=True)
    (Path(output_dir) / LOGS_DIR_NAME).mkdir(exist_ok=True)
-    (Path(output_dir) / CACHE_DIR_NAME).mkdir(exist_ok=True)
-    (Path(output_dir) / PERSONAS_DIR_NAME).mkdir(exist_ok=True)
-    (Path(output_dir) / PERSONAS_DIR_NAME / 'Default').mkdir(exist_ok=True)



--- a/archivebox/core/init.py
+++ b/archivebox/core/init.py
@ -1,2 +1 @@
 __package__ = 'archivebox.core'
-
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@ -6,7 +6,6 @@ from contextlib import redirect_stdout
 from datetime import datetime, timezone

 from django.contrib import admin
-from django.db.models import Count
 from django.urls import path
 from django.utils.html import format_html
 from django.utils.safestring import mark_safe
@ -14,32 +13,18 @@ from django.shortcuts import render, redirect
 from django.contrib.auth import get_user_model
 from django import forms

-
-from signal_webhooks.admin import WebhookAdmin, get_webhook_model
-# from plugantic.admin import CustomPlugin
-
 from ..util import htmldecode, urldecode, ansi_to_html

 from core.models import Snapshot, ArchiveResult, Tag
 from core.forms import AddLinkForm

 from core.mixins import SearchResultsAdminMixin
-from api.models import APIToken

 from index.html import snapshot_icons
 from logging_util import printable_filesize
 from main import add, remove
+from config import OUTPUT_DIR, SNAPSHOTS_PER_PAGE
 from extractors import archive_links
-from config import (
-    OUTPUT_DIR,
-    SNAPSHOTS_PER_PAGE,
-    VERSION,
-    VERSIONS_AVAILABLE,
-    CAN_UPGRADE
-)
-
-
-GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': VERSIONS_AVAILABLE, 'CAN_UPGRADE': CAN_UPGRADE}

 # Admin URLs
 # /admin/
@ -54,11 +39,343 @@ GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': VERSIONS_AVAILABLE,
 # TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel


+class ArchiveResultInline(admin.TabularInline):
+    model = ArchiveResult
+
+class TagInline(admin.TabularInline):
+    model = Snapshot.tags.through
+
+from django.contrib.admin.helpers import ActionForm
+from django.contrib.admin.widgets import AutocompleteSelectMultiple
+
+# WIP: broken by Django 3.1.2 -> 4.0 migration
+class AutocompleteTags:
+    model = Tag
+    search_fields = ['name']
+    name = 'tags'
+
+class AutocompleteTagsAdminStub:
+    name = 'admin'
+
+
+class SnapshotActionForm(ActionForm):
+    tags = forms.ModelMultipleChoiceField(
+        queryset=Tag.objects.all(),
+        required=False,
+        # WIP: broken by Django 3.1.2 -> 4.0 migration
+        widget=AutocompleteSelectMultiple(
+            AutocompleteTags(),
+            AutocompleteTagsAdminStub(),
+        ),
+    )
+
+    # TODO: allow selecting actions for specific extractors? is this useful?
+    # EXTRACTOR_CHOICES = [
+    #     (name, name.title())
+    #     for name, _, _ in get_default_archive_methods()
+    # ]
+    # extractor = forms.ChoiceField(
+    #     choices=EXTRACTOR_CHOICES,
+    #     required=False,
+    #     widget=forms.MultileChoiceField(attrs={'class': "form-control"})
+    # )
+
+
+class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
+    list_display = ('added', 'title_str', 'files', 'size', 'url_str')
+    sort_fields = ('title_str', 'url_str', 'added', 'files')
+    readonly_fields = ('info', 'bookmarked', 'added', 'updated')
+    search_fields = ('id', 'url', 'timestamp', 'title', 'tags__name')
+    fields = ('timestamp', 'url', 'title', 'tags', *readonly_fields)
+    list_filter = ('added', 'updated', 'tags', 'archiveresult__status')
+    ordering = ['-added']
+    actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
+    autocomplete_fields = ['tags']
+    inlines = [ArchiveResultInline]
+    list_per_page = SNAPSHOTS_PER_PAGE
+
+    action_form = SnapshotActionForm
+
+    def get_urls(self):
+        urls = super().get_urls()
+        custom_urls = [
+            path('grid/', self.admin_site.admin_view(self.grid_view), name='grid')
+        ]
+        return custom_urls + urls
+
+    def get_queryset(self, request):
+        self.request = request
+        return super().get_queryset(request).prefetch_related('tags')
+
+    def tag_list(self, obj):
+        return ', '.join(obj.tags.values_list('name', flat=True))
+
+    # TODO: figure out a different way to do this, you cant nest forms so this doenst work
+    # def action(self, obj):
+    #     # csrfmiddlewaretoken: Wa8UcQ4fD3FJibzxqHN3IYrrjLo4VguWynmbzzcPYoebfVUnDovon7GEMYFRgsh0
+    #     # action: update_snapshots
+    #     # select_across: 0
+    #     # _selected_action: 76d29b26-2a88-439e-877c-a7cca1b72bb3
+    #     return format_html(
+    #         '''
+    #             <form action="/admin/core/snapshot/" method="post" onsubmit="e => e.stopPropagation()">
+    #                 <input type="hidden" name="csrfmiddlewaretoken" value="{}">
+    #                 <input type="hidden" name="_selected_action" value="{}">
+    #                 <button name="update_snapshots">Check</button>
+    #                 <button name="update_titles">Pull title + favicon</button>
+    #                 <button name="update_snapshots">Update</button>
+    #                 <button name="overwrite_snapshots">Re-Archive (overwrite)</button>
+    #                 <button name="delete_snapshots">Permanently delete</button>
+    #             </form>
+    #         ''',
+    #         csrf.get_token(self.request),
+    #         obj.id,
+    #     )
+
+    def info(self, obj):
+        return format_html(
+            '''
+            UUID: <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp;
+            Timestamp: <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp;
+            URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
+            Archived: {} ({} files {}) &nbsp; &nbsp;
+            Favicon: <img src="{}" style="height: 20px"/> &nbsp; &nbsp;
+            Status code: {} &nbsp; &nbsp;
+            Server: {} &nbsp; &nbsp;
+            Content type: {} &nbsp; &nbsp;
+            Extension: {} &nbsp; &nbsp;
+            <br/><br/>
+            <a href="/archive/{}">View Snapshot index ➡️</a> &nbsp; &nbsp;
+            <a href="/admin/core/snapshot/?id__exact={}">View actions ⚙️</a>
+            ''',
+            obj.id,
+            obj.timestamp,
+            obj.url_hash,
+            '✅' if obj.is_archived else '❌',
+            obj.num_outputs,
+            self.size(obj),
+            f'/archive/{obj.timestamp}/favicon.ico',
+            obj.status_code or '?',
+            obj.headers and obj.headers.get('Server') or '?',
+            obj.headers and obj.headers.get('Content-Type') or '?',
+            obj.extension or '?',
+            obj.timestamp,
+            obj.id,
+        )
+
+    def title_str(self, obj):
+        canon = obj.as_link().canonical_outputs()
+        tags = ''.join(
+            format_html('<a href="/admin/core/snapshot/?tags__id__exact={}"><span class="tag">{}</span></a> ', tag.id, tag)
+            for tag in obj.tags.all()
+            if str(tag).strip()
+        )
+        return format_html(
+            '<a href="/{}">'
+                '<img src="/{}/{}" class="favicon" onerror="this.remove()">'
+            '</a>'
+            '<a href="/{}/index.html">'
+                '<b class="status-{}">{}</b>'
+            '</a>',
+            obj.archive_path,
+            obj.archive_path, canon['favicon_path'],
+            obj.archive_path,
+            'fetched' if obj.latest_title or obj.title else 'pending',
+            urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...'
+        ) + mark_safe(f' <span class="tags">{tags}</span>')
+
+    def files(self, obj):
+        return snapshot_icons(obj)
+
+    files.admin_order_field = 'updated'
+    files.short_description = 'Files Saved'
+
+    def size(self, obj):
+        archive_size = (Path(obj.link_dir) / 'index.html').exists() and obj.archive_size
+        if archive_size:
+            size_txt = printable_filesize(archive_size)
+            if archive_size > 52428800:
+                size_txt = mark_safe(f'<b>{size_txt}</b>')
+        else:
+            size_txt = mark_safe('<span style="opacity: 0.3">...</span>')
+        return format_html(
+            '<a href="/{}" title="View all files">{}</a>',
+            obj.archive_path,
+            size_txt,
+        )
+
+    size.admin_order_field = 'archiveresult__count'
+
+    def url_str(self, obj):
+        return format_html(
+            '<a href="{}"><code style="user-select: all;">{}</code></a>',
+            obj.url,
+            obj.url,
+        )
+
+    def grid_view(self, request, extra_context=None):
+
+        # cl = self.get_changelist_instance(request)
+
+        # Save before monkey patching to restore for changelist list view
+        saved_change_list_template = self.change_list_template
+        saved_list_per_page = self.list_per_page
+        saved_list_max_show_all = self.list_max_show_all
+
+        # Monkey patch here plus core_tags.py
+        self.change_list_template = 'private_index_grid.html'
+        self.list_per_page = SNAPSHOTS_PER_PAGE
+        self.list_max_show_all = self.list_per_page
+
+        # Call monkey patched view
+        rendered_response = self.changelist_view(request, extra_context=extra_context)
+
+        # Restore values
+        self.change_list_template = saved_change_list_template
+        self.list_per_page = saved_list_per_page
+        self.list_max_show_all = saved_list_max_show_all
+
+        return rendered_response
+
+    # for debugging, uncomment this to print all requests:
+    # def changelist_view(self, request, extra_context=None):
+    #     print('[*] Got request', request.method, request.POST)
+    #     return super().changelist_view(request, extra_context=None)
+
+    def update_snapshots(self, request, queryset):
+        archive_links([
+            snapshot.as_link()
+            for snapshot in queryset
+        ], out_dir=OUTPUT_DIR)
+    update_snapshots.short_description = "Pull"
+
+    def update_titles(self, request, queryset):
+        archive_links([
+            snapshot.as_link()
+            for snapshot in queryset
+        ], overwrite=True, methods=('title','favicon'), out_dir=OUTPUT_DIR)
+    update_titles.short_description = "⬇️ Title"
+
+    def resnapshot_snapshot(self, request, queryset):
+        for snapshot in queryset:
+            timestamp = datetime.now(timezone.utc).isoformat('T', 'seconds')
+            new_url = snapshot.url.split('#')[0] + f'#{timestamp}'
+            add(new_url, tag=snapshot.tags_str())
+    resnapshot_snapshot.short_description = "Re-Snapshot"
+
+    def overwrite_snapshots(self, request, queryset):
+        archive_links([
+            snapshot.as_link()
+            for snapshot in queryset
+        ], overwrite=True, out_dir=OUTPUT_DIR)
+    overwrite_snapshots.short_description = "Reset"
+
+    def delete_snapshots(self, request, queryset):
+        remove(snapshots=queryset, yes=True, delete=True, out_dir=OUTPUT_DIR)
+
+    delete_snapshots.short_description = "Delete"
+
+    def add_tags(self, request, queryset):
+        tags = request.POST.getlist('tags')
+        print('[+] Adding tags', tags, 'to Snapshots', queryset)
+        for obj in queryset:
+            obj.tags.add(*tags)
+
+    add_tags.short_description = "+"
+
+    def remove_tags(self, request, queryset):
+        tags = request.POST.getlist('tags')
+        print('[-] Removing tags', tags, 'to Snapshots', queryset)
+        for obj in queryset:
+            obj.tags.remove(*tags)
+
+    remove_tags.short_description = "–"
+
+        
+
+    title_str.short_description = 'Title'
+    url_str.short_description = 'Original URL'
+
+    title_str.admin_order_field = 'title'
+    url_str.admin_order_field = 'url'
+
+
+
+class TagAdmin(admin.ModelAdmin):
+    list_display = ('slug', 'name', 'num_snapshots', 'snapshots', 'id')
+    sort_fields = ('id', 'name', 'slug')
+    readonly_fields = ('id', 'num_snapshots', 'snapshots')
+    search_fields = ('id', 'name', 'slug')
+    fields = (*readonly_fields, 'name', 'slug')
+    actions = ['delete_selected']
+    ordering = ['-id']
+
+    def num_snapshots(self, obj):
+        return format_html(
+            '<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>',
+            obj.id,
+            obj.snapshot_set.count(),
+        )
+
+    def snapshots(self, obj):
+        total_count = obj.snapshot_set.count()
+        return mark_safe('<br/>'.join(
+            format_html(
+                '{} <code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a> {}</code>',
+                snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...',
+                snap.id,
+                snap.timestamp,
+                snap.url,
+            )
+            for snap in obj.snapshot_set.order_by('-updated')[:10]
+        ) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={obj.id}">and {total_count-10} more...<a>' if obj.snapshot_set.count() > 10 else ''))
+
+
+class ArchiveResultAdmin(admin.ModelAdmin):
+    list_display = ('id', 'start_ts', 'extractor', 'snapshot_str', 'tags_str', 'cmd_str', 'status', 'output_str')
+    sort_fields = ('start_ts', 'extractor', 'status')
+    readonly_fields = ('id', 'uuid', 'snapshot_str', 'tags_str')
+    search_fields = ('id', 'uuid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
+    fields = (*readonly_fields, 'snapshot', 'extractor', 'status', 'start_ts', 'end_ts', 'output', 'pwd', 'cmd', 'cmd_version')
+    autocomplete_fields = ['snapshot']
+
+    list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
+    ordering = ['-start_ts']
+    list_per_page = SNAPSHOTS_PER_PAGE
+
+    def snapshot_str(self, obj):
+        return format_html(
+            '<a href="/archive/{}/index.html"><b><code>[{}]</code></b></a><br/>'
+            '<small>{}</small>',
+            obj.snapshot.timestamp,
+            obj.snapshot.timestamp,
+            obj.snapshot.url[:128],
+        )
+
+    def tags_str(self, obj):
+        return obj.snapshot.tags_str()
+
+    def cmd_str(self, obj):
+        return format_html(
+            '<pre>{}</pre>',
+            ' '.join(obj.cmd) if isinstance(obj.cmd, list) else str(obj.cmd),
+        )
+
+    def output_str(self, obj):
+        return format_html(
+            '<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
+            obj.snapshot.timestamp,
+            obj.output if (obj.status == 'succeeded') and obj.extractor not in ('title', 'archive_org') else 'index.html',
+            obj.output,
+        )
+
+    tags_str.short_description = 'tags'
+    snapshot_str.short_description = 'snapshot'
+
 class ArchiveBoxAdmin(admin.AdminSite):
    site_header = 'ArchiveBox'
    index_title = 'Links'
    site_title = 'Index'
-    namespace = 'admin'

    def get_urls(self):
        return [
@ -104,424 +421,9 @@ class ArchiveBoxAdmin(admin.AdminSite):

        return render(template_name='add.html', request=request, context=context)

-
-archivebox_admin = ArchiveBoxAdmin()
-archivebox_admin.register(get_user_model())
-archivebox_admin.register(APIToken)
-archivebox_admin.register(get_webhook_model(), WebhookAdmin)
-archivebox_admin.disable_action('delete_selected')
-
-# archivebox_admin.register(CustomPlugin)
-
-# patch admin with methods to add data views (implemented by admin_data_views package)
-############### Additional sections are defined in settings.ADMIN_DATA_VIEWS #########
-from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls
-
-archivebox_admin.get_app_list = get_app_list.__get__(archivebox_admin, ArchiveBoxAdmin)
-archivebox_admin.admin_data_index_view = admin_data_index_view.__get__(archivebox_admin, ArchiveBoxAdmin)
-archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_admin, ArchiveBoxAdmin)
-archivebox_admin.get_urls = get_urls(archivebox_admin.get_urls).__get__(archivebox_admin, ArchiveBoxAdmin)
-
-
-class ArchiveResultInline(admin.TabularInline):
-    model = ArchiveResult
-
-class TagInline(admin.TabularInline):
-    model = Snapshot.tags.through
-
-from django.contrib.admin.helpers import ActionForm
-from django.contrib.admin.widgets import AutocompleteSelectMultiple
-
-class AutocompleteTags:
-    model = Tag
-    search_fields = ['name']
-    name = 'tags'
-    remote_field = TagInline
-
-class AutocompleteTagsAdminStub:
-    name = 'admin'
-
-
-class SnapshotActionForm(ActionForm):
-    tags = forms.ModelMultipleChoiceField(
-        queryset=Tag.objects.all(),
-        required=False,
-        widget=AutocompleteSelectMultiple(
-            AutocompleteTags(),
-            AutocompleteTagsAdminStub(),
-        ),
-    )
-
-    # TODO: allow selecting actions for specific extractors? is this useful?
-    # EXTRACTOR_CHOICES = [
-    #     (name, name.title())
-    #     for name, _, _ in get_default_archive_methods()
-    # ]
-    # extractor = forms.ChoiceField(
-    #     choices=EXTRACTOR_CHOICES,
-    #     required=False,
-    #     widget=forms.MultileChoiceField(attrs={'class': "form-control"})
-    # )
-
-
-def get_abid_info(self, obj):
-    return format_html(
-        # URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
-        '''
-        &nbsp; &nbsp; ABID:&nbsp; <code style="font-size: 16px; user-select: all"><b>{}</b></code><br/>
-        &nbsp; &nbsp; TS: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})<br/>
-        &nbsp; &nbsp; URI: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})<br/>
-        &nbsp; &nbsp; SUBTYPE: &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})<br/>
-        &nbsp; &nbsp; RAND: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp; <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})<br/><br/>
-        &nbsp; &nbsp; ABID AS UUID:&nbsp; <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp;<br/><br/>
-
-        &nbsp; &nbsp; .uuid: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp;<br/>
-        &nbsp; &nbsp; .id: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp; <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp;<br/>
-        &nbsp; &nbsp; .pk: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp;<br/><br/>
-        ''',
-        obj.abid,
-        obj.ABID.ts, obj.abid_values['ts'].isoformat() if isinstance(obj.abid_values['ts'], datetime) else obj.abid_values['ts'],
-        obj.ABID.uri, str(obj.abid_values['uri']),
-        obj.ABID.subtype, str(obj.abid_values['subtype']),
-        obj.ABID.rand, str(obj.abid_values['rand'])[-7:],
-        obj.ABID.uuid,
-        obj.uuid,
-        obj.id,
-        obj.pk,
-    )
-
-
-@admin.register(Snapshot, site=archivebox_admin)
-class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
-    list_display = ('added', 'title_str', 'files', 'size', 'url_str')
-    sort_fields = ('title_str', 'url_str', 'added', 'files')
-    readonly_fields = ('admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'identifiers')
-    search_fields = ('id', 'url', 'abid', 'uuid', 'timestamp', 'title', 'tags__name')
-    fields = ('url', 'timestamp', 'created_by', 'tags', 'title', *readonly_fields)
-    list_filter = ('added', 'updated', 'tags', 'archiveresult__status', 'created_by')
-    ordering = ['-added']
-    actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
-    autocomplete_fields = ['tags']
-    inlines = [ArchiveResultInline]
-    list_per_page = SNAPSHOTS_PER_PAGE
-
-    action_form = SnapshotActionForm
-
-    def changelist_view(self, request, extra_context=None):
-        extra_context = extra_context or {}
-        return super().changelist_view(request, extra_context | GLOBAL_CONTEXT)
-
-    def get_urls(self):
-        urls = super().get_urls()
-        custom_urls = [
-            path('grid/', self.admin_site.admin_view(self.grid_view), name='grid')
-        ]
-        return custom_urls + urls
-
-    def get_queryset(self, request):
-        self.request = request
-        return super().get_queryset(request).prefetch_related('tags').annotate(archiveresult_count=Count('archiveresult'))
-
-    def tag_list(self, obj):
-        return ', '.join(obj.tags.values_list('name', flat=True))
-
-    # TODO: figure out a different way to do this, you cant nest forms so this doenst work
-    # def action(self, obj):
-    #     # csrfmiddlewaretoken: Wa8UcQ4fD3FJibzxqHN3IYrrjLo4VguWynmbzzcPYoebfVUnDovon7GEMYFRgsh0
-    #     # action: update_snapshots
-    #     # select_across: 0
-    #     # _selected_action: 76d29b26-2a88-439e-877c-a7cca1b72bb3
-    #     return format_html(
-    #         '''
-    #             <form action="/admin/core/snapshot/" method="post" onsubmit="e => e.stopPropagation()">
-    #                 <input type="hidden" name="csrfmiddlewaretoken" value="{}">
-    #                 <input type="hidden" name="_selected_action" value="{}">
-    #                 <button name="update_snapshots">Check</button>
-    #                 <button name="update_titles">Pull title + favicon</button>
-    #                 <button name="update_snapshots">Update</button>
-    #                 <button name="overwrite_snapshots">Re-Archive (overwrite)</button>
-    #                 <button name="delete_snapshots">Permanently delete</button>
-    #             </form>
-    #         ''',
-    #         csrf.get_token(self.request),
-    #         obj.pk,
-    #     )
-
-    def admin_actions(self, obj):
-        return format_html(
-            # URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
-            '''
-            <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/archive/{}">Summary page ➡️</a> &nbsp; &nbsp;
-            <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/archive/{}/index.html#all">Result files 📑</a> &nbsp; &nbsp;
-            <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/admin/core/snapshot/?id__exact={}">Admin actions ⚙️</a>
-            ''',
-            obj.timestamp,
-            obj.timestamp,
-            obj.pk,
-        )
-
-    def status_info(self, obj):
-        return format_html(
-            # URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
-            '''
-            Archived: {} ({} files {}) &nbsp; &nbsp;
-            Favicon: <img src="{}" style="height: 20px"/> &nbsp; &nbsp;
-            Status code: {} &nbsp; &nbsp;<br/>
-            Server: {} &nbsp; &nbsp;
-            Content type: {} &nbsp; &nbsp;
-            Extension: {} &nbsp; &nbsp;
-            ''',
-            '✅' if obj.is_archived else '❌',
-            obj.num_outputs,
-            self.size(obj) or '0kb',
-            f'/archive/{obj.timestamp}/favicon.ico',
-            obj.status_code or '-',
-            obj.headers and obj.headers.get('Server') or '-',
-            obj.headers and obj.headers.get('Content-Type') or '-',
-            obj.extension or '-',
-        )
-
-    def identifiers(self, obj):
-        return get_abid_info(self, obj)
-
-    @admin.display(
-        description='Title',
-        ordering='title',
-    )
-    def title_str(self, obj):
-        canon = obj.as_link().canonical_outputs()
-        tags = ''.join(
-            format_html('<a href="/admin/core/snapshot/?tags__id__exact={}"><span class="tag">{}</span></a> ', tag.id, tag)
-            for tag in obj.tags.all()
-            if str(tag).strip()
-        )
-        return format_html(
-            '<a href="/{}">'
-                '<img src="/{}/{}" class="favicon" onerror="this.remove()">'
-            '</a>'
-            '<a href="/{}/index.html">'
-                '<b class="status-{}">{}</b>'
-            '</a>',
-            obj.archive_path,
-            obj.archive_path, canon['favicon_path'],
-            obj.archive_path,
-            'fetched' if obj.latest_title or obj.title else 'pending',
-            urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...'
-        ) + mark_safe(f' <span class="tags">{tags}</span>')
-
-    @admin.display(
-        description='Files Saved',
-        ordering='archiveresult_count',
-    )
-    def files(self, obj):
-        return snapshot_icons(obj)
-
-
-    @admin.display(
-        ordering='archiveresult_count'
-    )
-    def size(self, obj):
-        archive_size = (Path(obj.link_dir) / 'index.html').exists() and obj.archive_size
-        if archive_size:
-            size_txt = printable_filesize(archive_size)
-            if archive_size > 52428800:
-                size_txt = mark_safe(f'<b>{size_txt}</b>')
-        else:
-            size_txt = mark_safe('<span style="opacity: 0.3">...</span>')
-        return format_html(
-            '<a href="/{}" title="View all files">{}</a>',
-            obj.archive_path,
-            size_txt,
-        )
-
-
-    @admin.display(
-        description='Original URL',
-        ordering='url',
-    )
-    def url_str(self, obj):
-        return format_html(
-            '<a href="{}"><code style="user-select: all;">{}</code></a>',
-            obj.url,
-            obj.url[:128],
-        )
-
-    def grid_view(self, request, extra_context=None):
-
-        # cl = self.get_changelist_instance(request)
-
-        # Save before monkey patching to restore for changelist list view
-        saved_change_list_template = self.change_list_template
-        saved_list_per_page = self.list_per_page
-        saved_list_max_show_all = self.list_max_show_all
-
-        # Monkey patch here plus core_tags.py
-        self.change_list_template = 'private_index_grid.html'
-        self.list_per_page = SNAPSHOTS_PER_PAGE
-        self.list_max_show_all = self.list_per_page
-
-        # Call monkey patched view
-        rendered_response = self.changelist_view(request, extra_context=extra_context)
-
-        # Restore values
-        self.change_list_template = saved_change_list_template
-        self.list_per_page = saved_list_per_page
-        self.list_max_show_all = saved_list_max_show_all
-
-        return rendered_response
-
-    # for debugging, uncomment this to print all requests:
-    # def changelist_view(self, request, extra_context=None):
-    #     print('[*] Got request', request.method, request.POST)
-    #     return super().changelist_view(request, extra_context=None)
-
-    @admin.action(
-        description="Pull"
-    )
-    def update_snapshots(self, request, queryset):
-        archive_links([
-            snapshot.as_link()
-            for snapshot in queryset
-        ], out_dir=OUTPUT_DIR)
-
-    @admin.action(
-        description="⬇️ Title"
-    )
-    def update_titles(self, request, queryset):
-        archive_links([
-            snapshot.as_link()
-            for snapshot in queryset
-        ], overwrite=True, methods=('title','favicon'), out_dir=OUTPUT_DIR)
-
-    @admin.action(
-        description="Re-Snapshot"
-    )
-    def resnapshot_snapshot(self, request, queryset):
-        for snapshot in queryset:
-            timestamp = datetime.now(timezone.utc).isoformat('T', 'seconds')
-            new_url = snapshot.url.split('#')[0] + f'#{timestamp}'
-            add(new_url, tag=snapshot.tags_str())
-
-    @admin.action(
-        description="Reset"
-    )
-    def overwrite_snapshots(self, request, queryset):
-        archive_links([
-            snapshot.as_link()
-            for snapshot in queryset
-        ], overwrite=True, out_dir=OUTPUT_DIR)
-
-    @admin.action(
-        description="Delete"
-    )
-    def delete_snapshots(self, request, queryset):
-        remove(snapshots=queryset, yes=True, delete=True, out_dir=OUTPUT_DIR)
-
-
-    @admin.action(
-        description="+"
-    )
-    def add_tags(self, request, queryset):
-        tags = request.POST.getlist('tags')
-        print('[+] Adding tags', tags, 'to Snapshots', queryset)
-        for obj in queryset:
-            obj.tags.add(*tags)
-
-
-    @admin.action(
-        description="–"
-    )
-    def remove_tags(self, request, queryset):
-        tags = request.POST.getlist('tags')
-        print('[-] Removing tags', tags, 'to Snapshots', queryset)
-        for obj in queryset:
-            obj.tags.remove(*tags)
-
-
-        
-
-
-
-
-
-@admin.register(Tag, site=archivebox_admin)
-class TagAdmin(admin.ModelAdmin):
-    list_display = ('slug', 'name', 'num_snapshots', 'snapshots', 'abid')
-    sort_fields = ('id', 'name', 'slug', 'abid')
-    readonly_fields = ('created', 'modified', 'identifiers', 'num_snapshots', 'snapshots')
-    search_fields = ('id', 'abid', 'uuid', 'name', 'slug')
-    fields = ('name', 'slug', 'created_by', *readonly_fields, )
-    actions = ['delete_selected']
-    ordering = ['-id']
-
-    def identifiers(self, obj):
-        return get_abid_info(self, obj)
-
-    def num_snapshots(self, tag):
-        return format_html(
-            '<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>',
-            tag.id,
-            tag.snapshot_set.count(),
-        )
-
-    def snapshots(self, tag):
-        total_count = tag.snapshot_set.count()
-        return mark_safe('<br/>'.join(
-            format_html(
-                '{} <code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a> {}</code>',
-                snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...',
-                snap.pk,
-                snap.abid,
-                snap.url,
-            )
-            for snap in tag.snapshot_set.order_by('-updated')[:10]
-        ) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={tag.id}">and {total_count-10} more...<a>' if tag.snapshot_set.count() > 10 else ''))
-
-
-@admin.register(ArchiveResult, site=archivebox_admin)
-class ArchiveResultAdmin(admin.ModelAdmin):
-    list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
-    sort_fields = ('start_ts', 'extractor', 'status')
-    readonly_fields = ('snapshot_info', 'tags_str', 'created_by', 'created', 'modified', 'identifiers')
-    search_fields = ('id', 'uuid', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
-    fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'cmd',  'start_ts', 'end_ts', 'cmd_version', *readonly_fields)
-    autocomplete_fields = ['snapshot']
-
-    list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
-    ordering = ['-start_ts']
-    list_per_page = SNAPSHOTS_PER_PAGE
-
-    @admin.display(
-        description='Snapshot Info'
-    )
-    def snapshot_info(self, result):
-        return format_html(
-            '<a href="/archive/{}/index.html"><b><code>[{}]</code></b> &nbsp; {} &nbsp; {}</a><br/>',
-            result.snapshot.timestamp,
-            result.snapshot.abid,
-            result.snapshot.added.strftime('%Y-%m-%d %H:%M'),
-            result.snapshot.url[:128],
-        )
-
-    def identifiers(self, obj):
-        return get_abid_info(self, obj)
-
-    @admin.display(
-        description='Snapshot Tags'
-    )
-    def tags_str(self, result):
-        return result.snapshot.tags_str()
-
-    def cmd_str(self, result):
-        return format_html(
-            '<pre>{}</pre>',
-            ' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd),
-        )
-
-    def output_str(self, result):
-        return format_html(
-            '<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
-            result.snapshot.timestamp,
-            result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
-            result.output,
-        )
+admin.site = ArchiveBoxAdmin()
+admin.site.register(get_user_model())
+admin.site.register(Snapshot, SnapshotAdmin)
+admin.site.register(Tag, TagAdmin)
+admin.site.register(ArchiveResult, ArchiveResultAdmin)
+admin.site.disable_action('delete_selected')
--- a/archivebox/core/apps.py
+++ b/archivebox/core/apps.py
@ -1,28 +1,7 @@
-__package__ = 'archivebox.core'
-
 from django.apps import AppConfig


 class CoreConfig(AppConfig):
    name = 'core'
-
-    def ready(self):
-        # register our custom admin as the primary django admin
-        from django.contrib import admin
-        from django.contrib.admin import sites
-        from core.admin import archivebox_admin
-
-        admin.site = archivebox_admin
-        sites.site = archivebox_admin
-
-
-        # register signal handlers
-        from .auth import register_signals
-
-        register_signals()
-
-
-
-# from django.contrib.admin.apps import AdminConfig
-# class CoreAdminConfig(AdminConfig):
-#     default_site = "core.admin.get_admin_site"
+    # WIP: broken by Django 3.1.2 -> 4.0 migration
+    default_auto_field = 'django.db.models.UUIDField'
--- a/archivebox/core/auth.py
+++ b/archivebox/core/auth.py
@ -1,14 +0,0 @@
-__package__ = 'archivebox.core'
-
-
-from ..config import (
-    LDAP
-)
-
-def register_signals():
-
-    if LDAP:
-        import django_auth_ldap.backend
-        from .auth_ldap import create_user
-
-        django_auth_ldap.backend.populate_user.connect(create_user)
--- a/archivebox/core/auth_ldap.py
+++ b/archivebox/core/auth_ldap.py
@ -1,10 +0,0 @@
-from ..config import (
-    LDAP_CREATE_SUPERUSER
-)
-
-def create_user(sender, user=None, ldap_user=None, **kwargs):
-    if not user.id and LDAP_CREATE_SUPERUSER:
-        user.is_superuser = True
-
-    user.is_staff = True
-    print(f'[!] WARNING: Creating new user {user} based on LDAP user {ldap_user} (is_staff={user.is_staff}, is_superuser={user.is_superuser})')
--- a/archivebox/core/migrations/0007_archiveresult.py
+++ b/archivebox/core/migrations/0007_archiveresult.py
@ -17,6 +17,8 @@ except AttributeError:


 def forwards_func(apps, schema_editor):
+    from core.models import EXTRACTORS
+
    Snapshot = apps.get_model("core", "Snapshot")
    ArchiveResult = apps.get_model("core", "ArchiveResult")

--- a/archivebox/core/migrations/0023_alter_archiveresult_options_archiveresult_abid_and_more.py
+++ b/archivebox/core/migrations/0023_alter_archiveresult_options_archiveresult_abid_and_more.py
@ -1,43 +0,0 @@
-# Generated by Django 5.0.6 on 2024-05-13 10:56
-
-import charidfield.fields
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0022_auto_20231023_2008'),
-    ]
-
-    operations = [
-        migrations.AlterModelOptions(
-            name='archiveresult',
-            options={'verbose_name': 'Result'},
-        ),
-        migrations.AddField(
-            model_name='archiveresult',
-            name='abid',
-            field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='res_', unique=True),
-        ),
-        migrations.AddField(
-            model_name='snapshot',
-            name='abid',
-            field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='snp_', unique=True),
-        ),
-        migrations.AddField(
-            model_name='snapshot',
-            name='uuid',
-            field=models.UUIDField(blank=True, null=True, unique=True),
-        ),
-        migrations.AddField(
-            model_name='tag',
-            name='abid',
-            field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='tag_', unique=True),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='extractor',
-            field=models.CharField(choices=[('htmltotext', 'htmltotext'), ('git', 'git'), ('singlefile', 'singlefile'), ('media', 'media'), ('archive_org', 'archive_org'), ('readability', 'readability'), ('mercury', 'mercury'), ('favicon', 'favicon'), ('pdf', 'pdf'), ('headers', 'headers'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('title', 'title'), ('wget', 'wget')], max_length=32),
-        ),
-    ]
--- a/archivebox/core/migrations/0024_auto_20240513_1143.py
+++ b/archivebox/core/migrations/0024_auto_20240513_1143.py
@ -1,98 +0,0 @@
-# Generated by Django 5.0.6 on 2024-05-13 11:43
-
-from django.db import migrations
-from datetime import datetime
-from abid_utils.abid import abid_from_values
-
-
-def calculate_abid(self):
-    """
-    Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
-    """
-    prefix = self.abid_prefix
-    ts = eval(self.abid_ts_src)
-    uri = eval(self.abid_uri_src)
-    subtype = eval(self.abid_subtype_src)
-    rand = eval(self.abid_rand_src)
-
-    if (not prefix) or prefix == 'obj_':
-        suggested_abid = self.__class__.__name__[:3].lower()
-        raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})')
-
-    if not ts:
-        ts = datetime.utcfromtimestamp(0)
-        print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat())
-
-    if not uri:
-        uri = str(self)
-        print(f'[!] WARNING: Generating ABID with uri=str(self) placeholder because {self.__class__.__name__}.abid_uri_src={self.abid_uri_src} is unset!', uri)
-
-    if not subtype:
-        subtype = self.__class__.__name__
-        print(f'[!] WARNING: Generating ABID with subtype={subtype} placeholder because {self.__class__.__name__}.abid_subtype_src={self.abid_subtype_src} is unset!', subtype)
-
-    if not rand:
-        rand = getattr(self, 'uuid', None) or getattr(self, 'id', None) or getattr(self, 'pk')
-        print(f'[!] WARNING: Generating ABID with rand=self.id placeholder because {self.__class__.__name__}.abid_rand_src={self.abid_rand_src} is unset!', rand)
-
-    abid = abid_from_values(
-        prefix=prefix,
-        ts=ts,
-        uri=uri,
-        subtype=subtype,
-        rand=rand,
-    )
-    assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}'
-    return abid
-
-
-def copy_snapshot_uuids(apps, schema_editor):
-    print('   Copying snapshot.id -> snapshot.uuid...')
-    Snapshot = apps.get_model("core", "Snapshot")
-    for snapshot in Snapshot.objects.all():
-        snapshot.uuid = snapshot.id
-        snapshot.save(update_fields=["uuid"])
-
-def generate_snapshot_abids(apps, schema_editor):
-    print('   Generating snapshot.abid values...')
-    Snapshot = apps.get_model("core", "Snapshot")
-    for snapshot in Snapshot.objects.all():
-        snapshot.abid_prefix = 'snp_'
-        snapshot.abid_ts_src = 'self.added'
-        snapshot.abid_uri_src = 'self.url'
-        snapshot.abid_subtype_src = '"01"'
-        snapshot.abid_rand_src = 'self.uuid'
-
-        snapshot.abid = calculate_abid(snapshot)
-        snapshot.save(update_fields=["abid"])
-
-def generate_archiveresult_abids(apps, schema_editor):
-    print('   Generating ArchiveResult.abid values... (may take an hour or longer for large collections...)')
-    ArchiveResult = apps.get_model("core", "ArchiveResult")
-    Snapshot = apps.get_model("core", "Snapshot")
-    for result in ArchiveResult.objects.all():
-        result.abid_prefix = 'res_'
-        result.snapshot = Snapshot.objects.get(pk=result.snapshot_id)
-        result.snapshot_added = result.snapshot.added
-        result.snapshot_url = result.snapshot.url
-        result.abid_ts_src = 'self.snapshot_added'
-        result.abid_uri_src = 'self.snapshot_url'
-        result.abid_subtype_src = 'self.extractor'
-        result.abid_rand_src = 'self.id'
-
-        result.abid = calculate_abid(result)
-        result.uuid = result.abid.uuid
-        result.save(update_fields=["abid", "uuid"])
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0023_alter_archiveresult_options_archiveresult_abid_and_more'),
-    ]
-
-    operations = [
-        migrations.RunPython(copy_snapshot_uuids, reverse_code=migrations.RunPython.noop),
-        migrations.RunPython(generate_snapshot_abids, reverse_code=migrations.RunPython.noop),
-        migrations.RunPython(generate_archiveresult_abids, reverse_code=migrations.RunPython.noop),
-    ]
--- a/archivebox/core/migrations/0025_alter_archiveresult_uuid.py
+++ b/archivebox/core/migrations/0025_alter_archiveresult_uuid.py
@ -1,19 +0,0 @@
-# Generated by Django 5.0.6 on 2024-05-13 12:08
-
-import uuid
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0024_auto_20240513_1143'),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='uuid',
-            field=models.UUIDField(default=uuid.uuid4, editable=False, unique=True),
-        ),
-    ]
--- a/archivebox/core/migrations/0026_archiveresult_created_archiveresult_created_by_and_more.py
+++ b/archivebox/core/migrations/0026_archiveresult_created_archiveresult_created_by_and_more.py
@ -1,76 +0,0 @@
-# Generated by Django 5.0.6 on 2024-05-13 13:01
-
-import abid_utils.models
-import django.db.models.deletion
-import django.utils.timezone
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0025_alter_archiveresult_uuid'),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.AddField(
-            model_name='archiveresult',
-            name='created',
-            field=models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now),
-            preserve_default=False,
-        ),
-        migrations.AddField(
-            model_name='archiveresult',
-            name='created_by',
-            field=models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
-        ),
-        migrations.AddField(
-            model_name='archiveresult',
-            name='modified',
-            field=models.DateTimeField(auto_now=True),
-        ),
-        migrations.AddField(
-            model_name='snapshot',
-            name='created',
-            field=models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now),
-            preserve_default=False,
-        ),
-        migrations.AddField(
-            model_name='snapshot',
-            name='created_by',
-            field=models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
-        ),
-        migrations.AddField(
-            model_name='snapshot',
-            name='modified',
-            field=models.DateTimeField(auto_now=True),
-        ),
-        migrations.AddField(
-            model_name='tag',
-            name='created',
-            field=models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now),
-            preserve_default=False,
-        ),
-        migrations.AddField(
-            model_name='tag',
-            name='created_by',
-            field=models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
-        ),
-        migrations.AddField(
-            model_name='tag',
-            name='modified',
-            field=models.DateTimeField(auto_now=True),
-        ),
-        migrations.AddField(
-            model_name='tag',
-            name='uuid',
-            field=models.UUIDField(blank=True, null=True, unique=True),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='uuid',
-            field=models.UUIDField(blank=True, null=True, unique=True),
-        ),
-    ]
--- a/archivebox/core/mixins.py
+++ b/archivebox/core/mixins.py
@ -10,7 +10,7 @@ class SearchResultsAdminMixin:

        search_term = search_term.strip()
        if not search_term:
-            return qs.distinct(), use_distinct
+            return qs, use_distinct
        try:
            qsearch = query_search_index(search_term)
            qs = qs | qsearch
@ -18,4 +18,4 @@ class SearchResultsAdminMixin:
            print(f'[!] Error while using search backend: {err.__class__.__name__} {err}')
            messages.add_message(request, messages.WARNING, f'Error from the search backend, only showing results from default admin search fields - Error: {err}')
        
-        return qs.distinct(), use_distinct
+        return qs, use_distinct
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@ -1,14 +1,11 @@
 __package__ = 'archivebox.core'


-from typing import Optional, List, Dict
-from django_stubs_ext.db.models import TypedModelMeta
-
+import uuid
 import json

-import uuid
-from uuid import uuid4
 from pathlib import Path
+from typing import Optional, List

 from django.db import models
 from django.utils.functional import cached_property
@ -18,58 +15,40 @@ from django.urls import reverse
 from django.db.models import Case, When, Value, IntegerField
 from django.contrib.auth.models import User   # noqa

-from abid_utils.models import ABIDModel, ABIDField
-
 from ..config import ARCHIVE_DIR, ARCHIVE_DIR_NAME
 from ..system import get_dir_size
-from ..util import parse_date, base_url
+from ..util import parse_date, base_url, hashurl
 from ..index.schema import Link
 from ..index.html import snapshot_icons
-from ..extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS
+from ..extractors import get_default_archive_methods, ARCHIVE_METHODS_INDEXING_PRECEDENCE

-
-EXTRACTOR_CHOICES = [(extractor_name, extractor_name) for extractor_name in EXTRACTORS.keys()]
+EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()]
 STATUS_CHOICES = [
    ("succeeded", "succeeded"),
    ("failed", "failed"),
    ("skipped", "skipped")
 ]

+try:
+    JSONField = models.JSONField
+except AttributeError:
+    import jsonfield
+    JSONField = jsonfield.JSONField


-# class BaseModel(models.Model):
-#     # TODO: migrate all models to a shared base class with all our standard fields and helpers:
-#     #       ulid/created/modified/owner/is_deleted/as_json/from_json/etc.
-#     #
-#     # id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
-#     # ulid = models.CharField(max_length=26, null=True, blank=True, db_index=True, unique=True)
-
-#     class Meta(TypedModelMeta):
-#         abstract = True
-
-
-class Tag(ABIDModel):
+class Tag(models.Model):
    """
-    Based on django-taggit model + ABID base.
+    Based on django-taggit model
    """
-    abid_prefix = 'tag_'
-    abid_ts_src = 'self.created'          # TODO: add created/modified time
-    abid_uri_src = 'self.name'
-    abid_subtype_src = '"03"'
-    abid_rand_src = 'self.id'
-
-    # id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
    id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
-    uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
-    abid = ABIDField(prefix=abid_prefix)
-

    name = models.CharField(unique=True, blank=False, max_length=100)
-    slug = models.SlugField(unique=True, blank=True, max_length=100)
+
    # slug is autoset on save from name, never set it manually
+    slug = models.SlugField(unique=True, blank=True, max_length=100)


-    class Meta(TypedModelMeta):
+    class Meta:
        verbose_name = "Tag"
        verbose_name_plural = "Tags"

@ -105,16 +84,8 @@ class Tag(ABIDModel):
            return super().save(*args, **kwargs)


-class Snapshot(ABIDModel):
-    abid_prefix = 'snp_'
-    abid_ts_src = 'self.added'
-    abid_uri_src = 'self.url'
-    abid_subtype_src = '"01"'
-    abid_rand_src = 'self.id'
-
-    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)  # legacy pk
-    uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
-    abid = ABIDField(prefix=abid_prefix)
+class Snapshot(models.Model):
+    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)

    url = models.URLField(unique=True, db_index=True)
    timestamp = models.CharField(max_length=32, unique=True, db_index=True)
@ -127,7 +98,6 @@ class Snapshot(ABIDModel):

    keys = ('url', 'timestamp', 'title', 'tags', 'updated')

-
    def __repr__(self) -> str:
        title = self.title or '-'
        return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'
@ -156,8 +126,8 @@ class Snapshot(ABIDModel):
        from ..index import load_link_details
        return load_link_details(self.as_link())

-    def tags_str(self, nocache=True) -> str | None:
-        cache_key = f'{self.pk}-{(self.updated or self.added).timestamp()}-tags'
+    def tags_str(self, nocache=True) -> str:
+        cache_key = f'{self.id}-{(self.updated or self.added).timestamp()}-tags'
        calc_tags_str = lambda: ','.join(self.tags.order_by('name').values_list('name', flat=True))
        if nocache:
            tags_str = calc_tags_str()
@ -187,9 +157,13 @@ class Snapshot(ABIDModel):
        return self.as_link().is_archived

    @cached_property
-    def num_outputs(self) -> int:
+    def num_outputs(self):
        return self.archiveresult_set.filter(status='succeeded').count()

+    @cached_property
+    def url_hash(self):
+        return hashurl(self.url)
+
    @cached_property
    def base_url(self):
        return base_url(self.url)
@ -204,7 +178,7 @@ class Snapshot(ABIDModel):

    @cached_property
    def archive_size(self):
-        cache_key = f'{str(self.pk)[:12]}-{(self.updated or self.added).timestamp()}-size'
+        cache_key = f'{str(self.id)[:12]}-{(self.updated or self.added).timestamp()}-size'

        def calc_dir_size():
            try:
@ -225,7 +199,7 @@ class Snapshot(ABIDModel):
        return None

    @cached_property
-    def headers(self) -> Optional[Dict[str, str]]:
+    def headers(self) -> Optional[dict]:
        try:
            return json.loads((Path(self.link_dir) / 'headers.json').read_text(encoding='utf-8').strip())
        except Exception:
@ -276,37 +250,11 @@ class Snapshot(ABIDModel):
        tags_id = []
        for tag in tags:
            if tag.strip():
-                tags_id.append(Tag.objects.get_or_create(name=tag)[0].pk)
+                tags_id.append(Tag.objects.get_or_create(name=tag)[0].id)
        self.tags.clear()
        self.tags.add(*tags_id)


-    # def get_storage_dir(self, create=True, symlink=True) -> Path:
-    #     date_str = self.added.strftime('%Y%m%d')
-    #     domain_str = domain(self.url)
-    #     abs_storage_dir = Path(ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid)
-
-    #     if create and not abs_storage_dir.is_dir():
-    #         abs_storage_dir.mkdir(parents=True, exist_ok=True)
-
-    #     if symlink:
-    #         LINK_PATHS = [
-    #             Path(ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
-    #             # Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_id' / str(self.ulid),
-    #             Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_date' / date_str / domain_str / str(self.ulid),
-    #             Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_domain' / domain_str / date_str / str(self.ulid),
-    #         ]
-    #         for link_path in LINK_PATHS:
-    #             link_path.parent.mkdir(parents=True, exist_ok=True)
-    #             try:
-    #                 link_path.symlink_to(abs_storage_dir)
-    #             except FileExistsError:
-    #                 link_path.unlink()
-    #                 link_path.symlink_to(abs_storage_dir)
-
-    #     return abs_storage_dir
-
-
 class ArchiveResultManager(models.Manager):
    def indexable(self, sorted: bool = True):
        INDEXABLE_METHODS = [ r[0] for r in ARCHIVE_METHODS_INDEXING_PRECEDENCE ]
@ -318,22 +266,13 @@ class ArchiveResultManager(models.Manager):
        return qs


-class ArchiveResult(ABIDModel):
-    abid_prefix = 'res_'
-    abid_ts_src = 'self.snapshot.added'
-    abid_uri_src = 'self.snapshot.url'
-    abid_subtype_src = 'self.extractor'
-    abid_rand_src = 'self.uuid'
-    EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
-
-    # id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
-    id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')   # legacy pk
-    uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
-    abid = ABIDField(prefix=abid_prefix)
+class ArchiveResult(models.Model):
+    id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
+    uuid = models.UUIDField(default=uuid.uuid4, editable=False)

    snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
-    extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32)
-    cmd = models.JSONField()
+    extractor = models.CharField(choices=EXTRACTORS, max_length=32)
+    cmd = JSONField()
    pwd = models.CharField(max_length=256)
    cmd_version = models.CharField(max_length=128, default=None, null=True, blank=True)
    output = models.CharField(max_length=1024)
@ -343,69 +282,5 @@ class ArchiveResult(ABIDModel):

    objects = ArchiveResultManager()

-    class Meta(TypedModelMeta):
-        verbose_name = 'Result'
-
    def __str__(self):
        return self.extractor
-
-    @cached_property
-    def snapshot_dir(self):
-        return Path(self.snapshot.link_dir)
-
-
-    @property
-    def extractor_module(self):
-        return EXTRACTORS[self.extractor]
-
-    def output_path(self) -> str:
-        """return the canonical output filename or directory name within the snapshot dir"""
-        return self.extractor_module.get_output_path()
-
-    def embed_path(self) -> str:
-        """
-        return the actual runtime-calculated path to the file on-disk that
-        should be used for user-facing iframe embeds of this result
-        """
-
-        if hasattr(self.extractor_module, 'get_embed_path'):
-            return self.extractor_module.get_embed_path(self)
-
-        return self.extractor_module.get_output_path()
-
-    def legacy_output_path(self):
-        link = self.snapshot.as_link()
-        return link.canonical_outputs().get(f'{self.extractor}_path')
-
-    def output_exists(self) -> bool:
-        return Path(self.output_path()).exists()
-
-
-    # def get_storage_dir(self, create=True, symlink=True):
-    #     date_str = self.snapshot.added.strftime('%Y%m%d')
-    #     domain_str = domain(self.snapshot.url)
-    #     abs_storage_dir = Path(ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid)
-
-    #     if create and not abs_storage_dir.is_dir():
-    #         abs_storage_dir.mkdir(parents=True, exist_ok=True)
-
-    #     if symlink:
-    #         LINK_PATHS = [
-    #             Path(ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
-    #             # Path(ARCHIVE_DIR).parent / 'index' / 'results_by_id' / str(self.ulid),
-    #             # Path(ARCHIVE_DIR).parent / 'index' / 'results_by_date' / date_str / domain_str / self.extractor / str(self.ulid),
-    #             Path(ARCHIVE_DIR).parent / 'index' / 'results_by_domain' / domain_str / date_str / self.extractor / str(self.ulid),
-    #             Path(ARCHIVE_DIR).parent / 'index' / 'results_by_type' / self.extractor / date_str / domain_str / str(self.ulid),
-    #         ]
-    #         for link_path in LINK_PATHS:
-    #             link_path.parent.mkdir(parents=True, exist_ok=True)
-    #             try:
-    #                 link_path.symlink_to(abs_storage_dir)
-    #             except FileExistsError:
-    #                 link_path.unlink()
-    #                 link_path.symlink_to(abs_storage_dir)
-
-    #     return abs_storage_dir
-
-    # def symlink_index(self, create=True):
-    #     abs_result_dir = self.get_storage_dir(create=create)
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@ -10,7 +10,6 @@ from pathlib import Path
 from django.utils.crypto import get_random_string

 from ..config import (
-    CONFIG,
    DEBUG,
    SECRET_KEY,
    ALLOWED_HOSTS,
@ -19,9 +18,7 @@ from ..config import (
    CUSTOM_TEMPLATES_DIR,
    SQL_INDEX_FILENAME,
    OUTPUT_DIR,
-    ARCHIVE_DIR,
    LOGS_DIR,
-    CACHE_DIR,
    TIMEZONE,

    LDAP,
@ -55,26 +52,6 @@ APPEND_SLASH = True

 DEBUG = DEBUG or ('--debug' in sys.argv)

-
-# add plugins folders to system path, and load plugins in installed_apps
-BUILTIN_PLUGINS_DIR = PACKAGE_DIR / 'plugins'
-USER_PLUGINS_DIR = OUTPUT_DIR / 'plugins'
-sys.path.insert(0, str(BUILTIN_PLUGINS_DIR))
-sys.path.insert(0, str(USER_PLUGINS_DIR))
-
-def find_plugins(plugins_dir):
-    return {
-        # plugin_entrypoint.parent.name: import_module(plugin_entrypoint.parent.name).METADATA
-        plugin_entrypoint.parent.name: plugin_entrypoint.parent
-        for plugin_entrypoint in plugins_dir.glob('*/apps.py')
-    }
-
-INSTALLED_PLUGINS = {
-    **find_plugins(BUILTIN_PLUGINS_DIR),
-    **find_plugins(USER_PLUGINS_DIR),
-}
-
-
 INSTALLED_APPS = [
    'django.contrib.auth',
    'django.contrib.contenttypes',
@ -82,17 +59,8 @@ INSTALLED_APPS = [
    'django.contrib.messages',
    'django.contrib.staticfiles',
    'django.contrib.admin',
-    'django_jsonform',

-    'signal_webhooks',
-    'abid_utils',
-    'plugantic',
    'core',
-    'api',
-
-    *INSTALLED_PLUGINS.keys(),
-
-    'admin_data_views',

    'django_extensions',
 ]
@ -204,17 +172,6 @@ if DEBUG_TOOLBAR:
    ]
    MIDDLEWARE = [*MIDDLEWARE, 'debug_toolbar.middleware.DebugToolbarMiddleware']

-
-# https://github.com/bensi94/Django-Requests-Tracker (improved version of django-debug-toolbar)
-# Must delete archivebox/templates/admin to use because it relies on some things we override
-# visit /__requests_tracker__/ to access
-DEBUG_REQUESTS_TRACKER = False
-if DEBUG_REQUESTS_TRACKER:
-    INSTALLED_APPS += ["requests_tracker"]
-    MIDDLEWARE += ["requests_tracker.middleware.requests_tracker_middleware"]
-    INTERNAL_IPS = ["127.0.0.1", "10.0.2.2", "0.0.0.0", "*"]
-
-
 ################################################################################
 ### Staticfile and Template Settings
 ################################################################################
@ -254,11 +211,6 @@ TEMPLATES = [
 ### External Service Settings
 ################################################################################

-
-CACHE_DB_FILENAME = 'cache.sqlite3'
-CACHE_DB_PATH = CACHE_DIR / CACHE_DB_FILENAME
-CACHE_DB_TABLE = 'django_cache'
-
 DATABASE_FILE = Path(OUTPUT_DIR) / SQL_INDEX_FILENAME
 DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", str(DATABASE_FILE))

@ -272,56 +224,23 @@ DATABASES = {
        },
        'TIME_ZONE': TIMEZONE,
        # DB setup is sometimes modified at runtime by setup_django() in config.py
-    },
-    # 'cache': {
-    #     'ENGINE': 'django.db.backends.sqlite3',
-    #     'NAME': CACHE_DB_PATH,
-    #     'OPTIONS': {
-    #         'timeout': 60,
-    #         'check_same_thread': False,
-    #     },
-    #     'TIME_ZONE': TIMEZONE,
-    # },
+    }
 }
-MIGRATION_MODULES = {'signal_webhooks': None}
-
-# as much as I'd love this to be a UUID or ULID field, it's not supported yet as of Django 5.0
-DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'

+CACHE_BACKEND = 'django.core.cache.backends.locmem.LocMemCache'
+# CACHE_BACKEND = 'django.core.cache.backends.db.DatabaseCache'
+# CACHE_BACKEND = 'django.core.cache.backends.dummy.DummyCache'

 CACHES = {
-    'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'},
-    # 'sqlite': {'BACKEND': 'django.core.cache.backends.db.DatabaseCache', 'LOCATION': 'cache'},
-    # 'dummy': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'},
-    # 'filebased': {"BACKEND": "django.core.cache.backends.filebased.FileBasedCache", "LOCATION": CACHE_DIR / 'cache_filebased'},
+    'default': {
+        'BACKEND': CACHE_BACKEND,
+        'LOCATION': 'django_cache_default',
+    }
 }

 EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'


-STORAGES = {
-    "default": {
-        "BACKEND": "django.core.files.storage.FileSystemStorage",
-    },
-    "staticfiles": {
-        "BACKEND": "django.contrib.staticfiles.storage.StaticFilesStorage",
-    },
-    "archive": {
-        "BACKEND": "django.core.files.storage.FileSystemStorage",
-        "OPTIONS": {
-            "base_url": "/archive/",
-            "location": ARCHIVE_DIR,
-        },
-    },
-    # "personas": {
-    #     "BACKEND": "django.core.files.storage.FileSystemStorage",
-    #     "OPTIONS": {
-    #         "base_url": "/personas/",
-    #         "location": PERSONAS_DIR,
-    #     },
-    # },
-}
-
 ################################################################################
 ### Security Settings
 ################################################################################
@ -350,6 +269,9 @@ AUTH_PASSWORD_VALIDATORS = [
    {'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'},
 ]

+# WIP: broken by Django 3.1.2 -> 4.0 migration
+DEFAULT_AUTO_FIELD = 'django.db.models.UUIDField'
+
 ################################################################################
 ### Shell Settings
 ################################################################################
@ -368,6 +290,7 @@ if IS_SHELL:

 LANGUAGE_CODE = 'en-us'
 USE_I18N = True
+USE_L10N = True
 USE_TZ = True
 DATETIME_FORMAT = 'Y-m-d g:iA'
 SHORT_DATETIME_FORMAT = 'Y-m-d h:iA'
@ -448,54 +371,3 @@ LOGGING = {
        }
    },
 }
-
-
-# Add default webhook configuration to the User model
-SIGNAL_WEBHOOKS_CUSTOM_MODEL = 'api.models.OutboundWebhook'
-SIGNAL_WEBHOOKS = {
-    "HOOKS": {
-        # ... is a special sigil value that means "use the default autogenerated hooks"
-        "django.contrib.auth.models.User": ...,
-        "core.models.Snapshot": ...,
-        "core.models.ArchiveResult": ...,
-        "core.models.Tag": ...,
-        "api.models.APIToken": ...,
-    },
-}
-
-
-ADMIN_DATA_VIEWS = {
-    "NAME": "Environment",
-    "URLS": [
-        {
-            "route": "config/",
-            "view": "core.views.live_config_list_view",
-            "name": "Configuration",
-            "items": {
-                "route": "<str:key>/",
-                "view": "core.views.live_config_value_view",
-                "name": "config_val",
-            },
-        },
-        {
-            "route": "binaries/",
-            "view": "plugantic.views.binaries_list_view",
-            "name": "Binaries",
-            "items": {
-                "route": "<str:key>/",
-                "view": "plugantic.views.binary_detail_view",
-                "name": "binary",
-            },
-        },
-        {
-            "route": "plugins/",
-            "view": "plugantic.views.plugins_list_view",
-            "name": "Plugins",
-            "items": {
-                "route": "<str:key>/",
-                "view": "plugantic.views.plugin_detail_view",
-                "name": "plugin",
-            },
-        },
-    ],
-}
--- a/archivebox/core/urls.py
+++ b/archivebox/core/urls.py
@ -1,4 +1,4 @@
-__package__ = 'archivebox.core'
+from django.contrib import admin

 from django.urls import path, include
 from django.views import static
@ -6,9 +6,7 @@ from django.contrib.staticfiles.urls import staticfiles_urlpatterns
 from django.conf import settings
 from django.views.generic.base import RedirectView

-from .admin import archivebox_admin
-from .views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
-
+from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView

 # GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
 # from config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
@ -36,12 +34,13 @@ urlpatterns = [


    path('accounts/', include('django.contrib.auth.urls')),
-    path('admin/', archivebox_admin.urls),
+    path('admin/', admin.site.urls),
    
-    path("api/",      include('api.urls')),
+    # do not add extra_context like this as not all admin views (e.g. ModelAdmin.autocomplete_view accept extra kwargs)
+    # path('admin/', admin.site.urls, {'extra_context': GLOBAL_CONTEXT}),

    path('health/', HealthCheckView.as_view(), name='healthcheck'),
-    path('error/', lambda *_: 1/0),
+    path('error/', lambda _: 1/0),

    # path('jet_api/', include('jet_django.urls')),  Enable to use https://www.jetadmin.io/integrations/django

@ -52,10 +51,10 @@ urlpatterns = [
 urlpatterns += staticfiles_urlpatterns()

 if settings.DEBUG_TOOLBAR:
-    urlpatterns += [path('__debug__/', include("debug_toolbar.urls"))]
-
-if settings.DEBUG_REQUESTS_TRACKER:
-    urlpatterns += [path("__requests_tracker__/", include("requests_tracker.urls"))]
+    import debug_toolbar
+    urlpatterns += [
+        path('__debug__/', include(debug_toolbar.urls)),
+    ]


 # # Proposed FUTURE URLs spec
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@ -1,13 +1,10 @@
 __package__ = 'archivebox.core'

-from typing import Callable
-
 from io import StringIO
-from pathlib import Path
 from contextlib import redirect_stdout

 from django.shortcuts import render, redirect
-from django.http import HttpRequest, HttpResponse, Http404
+from django.http import HttpResponse, Http404
 from django.utils.html import format_html, mark_safe
 from django.views import View, static
 from django.views.generic.list import ListView
@ -17,10 +14,6 @@ from django.contrib.auth.mixins import UserPassesTestMixin
 from django.views.decorators.csrf import csrf_exempt
 from django.utils.decorators import method_decorator

-from admin_data_views.typing import TableContext, ItemContext
-from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
-
-
 from core.models import Snapshot
 from core.forms import AddLinkForm

@ -33,18 +26,10 @@ from ..config import (
    COMMIT_HASH,
    FOOTER_INFO,
    SNAPSHOTS_PER_PAGE,
-    CONFIG,
-    CONFIG_SCHEMA,
-    DYNAMIC_CONFIG_SCHEMA,
-    USER_CONFIG,
-    SAVE_ARCHIVE_DOT_ORG,
-    PREVIEW_ORIGINALS,
 )
-from ..logging_util import printable_filesize
 from ..main import add
-from ..util import base_url, ansi_to_html, htmlencode, urldecode, urlencode, ts_to_date_str
+from ..util import base_url, ansi_to_html
 from ..search import query_search_index
-from ..extractors.wget import wget_output_path


 class HomepageView(View):
@ -61,120 +46,10 @@ class HomepageView(View):
 class SnapshotView(View):
    # render static html index from filesystem archive/<timestamp>/index.html

-    @staticmethod
-    def render_live_index(request, snapshot):
-        TITLE_LOADING_MSG = 'Not yet archived...'
-        HIDDEN_RESULTS = ('favicon', 'headers', 'title', 'htmltotext', 'warc', 'archive_org')
-
-        archiveresults = {}
-
-        results = snapshot.archiveresult_set.all()
-
-        for result in results:
-            embed_path = result.embed_path()
-            abs_path = result.snapshot_dir / (embed_path or 'None')
-
-            if (result.status == 'succeeded'
-                and (result.extractor not in HIDDEN_RESULTS)
-                and embed_path
-                and abs_path.exists()):
-                if abs_path.is_dir() and not any(abs_path.glob('*.*')):
-                    continue
-
-                result_info = {
-                    'name': result.extractor,
-                    'path': embed_path,
-                    'ts': ts_to_date_str(result.end_ts),
-                    'size': abs_path.stat().st_size or '?',
-                }
-                archiveresults[result.extractor] = result_info
-
-        existing_files = {result['path'] for result in archiveresults.values()}
-        min_size_threshold = 10_000  # bytes
-        allowed_extensions = {
-            'txt',
-            'html',
-            'htm',
-            'png',
-            'jpg',
-            'jpeg',
-            'gif',
-            'webp'
-            'svg',
-            'webm',
-            'mp4',
-            'mp3',
-            'pdf',
-            'md',
-        }
-
-
-        # iterate through all the files in the snapshot dir and add the biggest ones to the result list
-        snap_dir = Path(snapshot.link_dir)
-        for result_file in (*snap_dir.glob('*'), *snap_dir.glob('*/*')):
-            extension = result_file.suffix.lstrip('.').lower()
-            if result_file.is_dir() or result_file.name.startswith('.') or extension not in allowed_extensions:
-                continue
-            if result_file.name in existing_files or result_file.name == 'index.html':
-                continue
-
-            file_size = result_file.stat().st_size or 0
-
-            if file_size > min_size_threshold:
-                archiveresults[result_file.name] = {
-                    'name': result_file.stem,
-                    'path': result_file.relative_to(snap_dir),
-                    'ts': ts_to_date_str(result_file.stat().st_mtime or 0),
-                    'size': file_size,
-                }
-
-        preferred_types = ('singlefile', 'wget', 'screenshot', 'dom', 'media', 'pdf', 'readability', 'mercury')
-        all_types = preferred_types + tuple(result_type for result_type in archiveresults.keys() if result_type not in preferred_types)
-
-        best_result = {'path': 'None'}
-        for result_type in preferred_types:
-            if result_type in archiveresults:
-                best_result = archiveresults[result_type]
-                break
-
-        link = snapshot.as_link()
-
-        link_info = link._asdict(extended=True)
-
-        try:
-            warc_path = 'warc/' + list(Path(snap_dir).glob('warc/*.warc.*'))[0].name
-        except IndexError:
-            warc_path = 'warc/'
-
-        context = {
-            **link_info,
-            **link_info['canonical'],
-            'title': htmlencode(
-                link.title
-                or (link.base_url if link.is_archived else TITLE_LOADING_MSG)
-            ),
-            'extension': link.extension or 'html',
-            'tags': link.tags or 'untagged',
-            'size': printable_filesize(link.archive_size) if link.archive_size else 'pending',
-            'status': 'archived' if link.is_archived else 'not yet archived',
-            'status_color': 'success' if link.is_archived else 'danger',
-            'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
-            'warc_path': warc_path,
-            'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
-            'PREVIEW_ORIGINALS': PREVIEW_ORIGINALS,
-            'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name']) if r['name'] in all_types else -r['size']),
-            'best_result': best_result,
-            # 'tags_str': 'somealskejrewlkrjwer,werlmwrwlekrjewlkrjwer324m532l,4m32,23m324234',
-        }
-        return render(template_name='core/snapshot_live.html', request=request, context=context)
-
-
    def get(self, request, path):
        if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:
            return redirect(f'/admin/login/?next={request.path}')

-        snapshot = None
-
        try:
            slug, archivefile = path.split('/', 1)
        except (IndexError, ValueError):
@ -190,11 +65,7 @@ class SnapshotView(View):
            try:
                try:
                    snapshot = Snapshot.objects.get(Q(timestamp=slug) | Q(id__startswith=slug))
-                    if archivefile == 'index.html':
-                        # if they requested snapshot index, serve live rendered template instead of static html
-                        response = self.render_live_index(request, snapshot)
-                    else:
-                        response = static.serve(request, archivefile, document_root=snapshot.link_dir, show_indexes=True)
+                    response = static.serve(request, archivefile, document_root=snapshot.link_dir, show_indexes=True)
                    response["Link"] = f'<{snapshot.url}>; rel="canonical"'
                    return response
                except Snapshot.DoesNotExist:
@ -246,33 +117,26 @@ class SnapshotView(View):
                    status=404,
                )
            except Http404:
-                assert snapshot     # (Snapshot.DoesNotExist is already handled above)
-
                # Snapshot dir exists but file within does not e.g. 124235.324234/screenshot.png
                return HttpResponse(
                    format_html(
                        (
                            '<center><br/><br/><br/>'
-                            f'Snapshot <a href="/archive/{snapshot.timestamp}/index.html" target="_top"><b><code>[{snapshot.timestamp}]</code></b></a>: <a href="{snapshot.url}" target="_blank" rel="noreferrer">{snapshot.url}</a><br/>'
-                            f'was queued on {str(snapshot.added).split(".")[0]}, '
-                            f'but no files have been saved yet in:<br/><b><a href="/archive/{snapshot.timestamp}/" target="_top"><code>{snapshot.timestamp}</code></a><code>/'
+                            f'Snapshot <a href="/archive/{snapshot.timestamp}/index.html" target="_top"><b><code>[{snapshot.timestamp}]</code></b></a> exists in DB, but resource <b><code>{snapshot.timestamp}/'
                            '{}'
-                            f'</code></b><br/><br/>'
-                            'It\'s possible {} '
-                            f'during the last capture on {str(snapshot.added).split(".")[0]},<br/>or that the archiving process has not completed yet.<br/>'
-                            f'<pre><code># run this cmd to finish/retry archiving this Snapshot</code><br/>'
-                            f'<code style="user-select: all; color: #333">archivebox update -t timestamp {snapshot.timestamp}</code></pre><br/><br/>'
+                            f'</code></b> does not exist in <a href="/archive/{snapshot.timestamp}/" target="_top">snapshot dir</a> yet.<br/><br/>'
+                            'Maybe this resource type is not availabe for this Snapshot,<br/>or the archiving process has not completed yet?<br/>'
+                            f'<pre><code># run this cmd to finish archiving this Snapshot<br/>archivebox update -t timestamp {snapshot.timestamp}</code></pre><br/><br/>'
                            '<div class="text-align: left; width: 100%; max-width: 400px">'
                            '<i><b>Next steps:</i></b><br/>'
                            f'- list all the <a href="/archive/{snapshot.timestamp}/" target="_top">Snapshot files <code>.*</code></a><br/>'
                            f'- view the <a href="/archive/{snapshot.timestamp}/index.html" target="_top">Snapshot <code>./index.html</code></a><br/>'
-                            f'- go to the <a href="/admin/core/snapshot/{snapshot.pk}/change/" target="_top">Snapshot admin</a> to edit<br/>'
-                            f'- go to the <a href="/admin/core/snapshot/?uuid__startswith={snapshot.uuid}" target="_top">Snapshot actions</a> to re-archive<br/>'
+                            f'- go to the <a href="/admin/core/snapshot/{snapshot.id}/change/" target="_top">Snapshot admin</a> to edit<br/>'
+                            f'- go to the <a href="/admin/core/snapshot/?id__startswith={snapshot.id}" target="_top">Snapshot actions</a> to re-archive<br/>'
                            '- or return to <a href="/" target="_top">the main index...</a></div>'
                            '</center>'
                        ),
-                        archivefile if str(archivefile) != 'None' else '',
-                        f'the {archivefile} resource could not be fetched' if str(archivefile) != 'None' else 'the original site was not available',
+                        archivefile,
                    ),
                    content_type="text/html",
                    status=404,
@ -367,7 +231,7 @@ class PublicIndexView(ListView):
                qs = qs | query_search_index(query)
            except Exception as err:
                print(f'[!] Error while using search backend: {err.__class__.__name__} {err}')
-        return qs.distinct()
+        return qs

    def get(self, *args, **kwargs):
        if PUBLIC_INDEX or self.request.user.is_authenticated:
@ -448,124 +312,3 @@ class HealthCheckView(View):
            content_type='text/plain',
            status=200
        )
-
-
-def find_config_section(key: str) -> str:
-    matching_sections = [
-        name for name, opts in CONFIG_SCHEMA.items() if key in opts
-    ]
-    section = matching_sections[0] if matching_sections else 'DYNAMIC'
-    return section
-
-def find_config_default(key: str) -> str:
-    default_val = USER_CONFIG.get(key, {}).get('default', lambda: None)
-    if isinstance(default_val, Callable):
-        return None
-    else:
-        default_val = repr(default_val)
-    return default_val
-
-def find_config_type(key: str) -> str:
-    if key in USER_CONFIG:
-        return USER_CONFIG[key]['type'].__name__
-    elif key in DYNAMIC_CONFIG_SCHEMA:
-        return type(CONFIG[key]).__name__
-    return 'str'
-
-def key_is_safe(key: str) -> bool:
-    for term in ('key', 'password', 'secret', 'token'):
-        if term in key.lower():
-            return False
-    return True
-
-@render_with_table_view
-def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
-
-    assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
-
-    rows = {
-        "Section": [],
-        "Key": [],
-        "Type": [],
-        "Value": [],
-        "Default": [],
-        # "Documentation": [],
-        "Aliases": [],
-    }
-
-    for section in CONFIG_SCHEMA.keys():
-        for key in CONFIG_SCHEMA[section].keys():
-            rows['Section'].append(section)   # section.replace('_', ' ').title().replace(' Config', '')
-            rows['Key'].append(ItemLink(key, key=key))
-            rows['Type'].append(mark_safe(f'<code>{find_config_type(key)}</code>'))
-            rows['Value'].append(mark_safe(f'<code>{CONFIG[key]}</code>') if key_is_safe(key) else '******** (redacted)')
-            rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+%27{key}%27&type=code"><code style="text-decoration: underline">{find_config_default(key) or "See here..."}</code></a>'))
-            # rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
-            rows['Aliases'].append(', '.join(CONFIG_SCHEMA[section][key].get('aliases', [])))
-
-    section = 'DYNAMIC'
-    for key in DYNAMIC_CONFIG_SCHEMA.keys():
-        rows['Section'].append(section)   # section.replace('_', ' ').title().replace(' Config', '')
-        rows['Key'].append(ItemLink(key, key=key))
-        rows['Type'].append(mark_safe(f'<code>{find_config_type(key)}</code>'))
-        rows['Value'].append(mark_safe(f'<code>{CONFIG[key]}</code>') if key_is_safe(key) else '******** (redacted)')
-        rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+%27{key}%27&type=code"><code style="text-decoration: underline">{find_config_default(key) or "See here..."}</code></a>'))
-        # rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
-        rows['Aliases'].append(ItemLink(key, key=key) if key in USER_CONFIG else '')
-
-    return TableContext(
-        title="Computed Configuration Values",
-        table=rows,
-    )
-
-@render_with_item_view
-def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
-
-    assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
-
-    aliases = USER_CONFIG.get(key, {}).get("aliases", [])
-
-    return ItemContext(
-        slug=key,
-        title=key,
-        data=[
-            {
-                "name": mark_safe(f'data / ArchiveBox.conf &nbsp; [{find_config_section(key)}]  &nbsp; <b><code style="color: lightgray">{key}</code></b>' if key in USER_CONFIG else f'[DYNAMIC CONFIG]   &nbsp; <b><code style="color: lightgray">{key}</code></b> &nbsp; <small>(calculated at runtime)</small>'),
-                "description": None,
-                "fields": {
-                    'Key': key,
-                    'Type': find_config_type(key),
-                    'Value': CONFIG[key] if key_is_safe(key) else '********',
-                },
-                "help_texts": {
-                    'Key': mark_safe(f'''
-                        <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">Documentation</a>  &nbsp;
-                        <span style="display: {"inline" if aliases else "none"}">
-                            Aliases: {", ".join(aliases)}
-                        </span>
-                    '''),
-                    'Type': mark_safe(f'''
-                        <a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+%27{key}%27&type=code">
-                            See full definition in <code>archivebox/config.py</code>...
-                        </a>
-                    '''),
-                    'Value': mark_safe(f'''
-                        {'<b style="color: red">Value is redacted for your security. (Passwords, secrets, API tokens, etc. cannot be viewed in the Web UI)</b><br/><br/>' if not key_is_safe(key) else ''}
-                        Default: <a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+%27{key}%27&type=code">
-                            <code>{find_config_default(key) or 'See 1here...'}</code>
-                        </a>
-                        <br/><br/>
-                        <p style="display: {"block" if key in USER_CONFIG else "none"}">
-                            <i>To change this value, edit <code>data/ArchiveBox.conf</code> or run:</i>
-                            <br/><br/>
-                            <code>archivebox config --set {key}="{
-                                val.strip("'")
-                                if (val := find_config_default(key)) else
-                                (repr(CONFIG[key] if key_is_safe(key) else '********')).strip("'")
-                            }"</code>
-                        </p>
-                    '''),
-                },
-            },
-        ],
-    )
--- a/archivebox/extractors/init.py
+++ b/archivebox/extractors/init.py
@ -1,13 +1,11 @@
 __package__ = 'archivebox.extractors'

-from typing import Callable, Optional, Dict, List, Iterable, Union, Protocol, cast
-
 import os
 import sys
 from pathlib import Path
-from importlib import import_module
-from datetime import datetime, timezone

+from typing import Callable, Optional, List, Iterable, Union
+from datetime import datetime, timezone
 from django.db.models import QuerySet

 from ..config import (
@ -133,7 +131,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s

        link = load_link_details(link, out_dir=out_dir)
        write_link_details(link, out_dir=out_dir, skip_sql_index=False)
-        log_link_archiving_started(link, str(out_dir), is_new)
+        log_link_archiving_started(link, out_dir, is_new)
        link = link.overwrite(updated=datetime.now(timezone.utc))
        stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
        start_ts = datetime.now(timezone.utc)
@ -160,13 +158,23 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
                    # bump the updated time on the main Snapshot here, this is critical
                    # to be able to cache summaries of the ArchiveResults for a given
                    # snapshot without having to load all the results from the DB each time.
-                    # (we use {Snapshot.pk}-{Snapshot.updated} as the cache key and assume
+                    # (we use {Snapshot.id}-{Snapshot.updated} as the cache key and assume
                    # ArchiveResults are unchanged as long as the updated timestamp is unchanged)
                    snapshot.save()
                else:
                    # print('{black}      X {}{reset}'.format(method_name, **ANSI))
                    stats['skipped'] += 1
            except Exception as e:
+                # Disabled until https://github.com/ArchiveBox/ArchiveBox/issues/984
+                # and https://github.com/ArchiveBox/ArchiveBox/issues/1014
+                # are fixed.
+                """
+                raise Exception('Exception in archive_methods.save_{}(Link(url={}))'.format(
+                    method_name,
+                    link.url,
+                )) from e
+                """
+                # Instead, use the kludgy workaround from
                # https://github.com/ArchiveBox/ArchiveBox/issues/984#issuecomment-1150541627
                with open(ERROR_LOG, "a", encoding='utf-8') as f:
                    command = ' '.join(sys.argv)
@ -179,13 +187,6 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
                    ) + "\n" + str(e) + "\n"))
                    #f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")

-                # print(f'        ERROR: {method_name} {e.__class__.__name__}: {e} {getattr(e, "hints", "")}', ts, link.url, command)
-                raise Exception('Exception in archive_methods.save_{}(Link(url={}))'.format(
-                    method_name,
-                    link.url,
-                )) from e
-
-
        # print('    ', stats)

        try:
@ -217,7 +218,7 @@ def archive_links(all_links: Union[Iterable[Link], QuerySet], overwrite: bool=Fa

    if type(all_links) is QuerySet:
        num_links: int = all_links.count()
-        get_link = lambda x: x.as_link_with_details()
+        get_link = lambda x: x.as_link()
        all_links = all_links.iterator()
    else:
        num_links: int = len(all_links)
@ -242,37 +243,3 @@ def archive_links(all_links: Union[Iterable[Link], QuerySet], overwrite: bool=Fa

    log_archiving_finished(num_links)
    return all_links
-
-
-
-EXTRACTORS_DIR = Path(__file__).parent
-
-class ExtractorModuleProtocol(Protocol):
-    """Type interface for an Extractor Module (WIP)"""
-    
-    get_output_path: Callable
-    
-    # TODO:
-    # get_embed_path: Callable | None
-    # should_extract(Snapshot)
-    # extract(Snapshot)
-
-
-def get_extractors(dir: Path=EXTRACTORS_DIR) -> Dict[str, ExtractorModuleProtocol]:
-    """iterate through archivebox/extractors/*.py and load extractor modules"""
-    EXTRACTORS = {}
-
-    for filename in EXTRACTORS_DIR.glob('*.py'):
-        if filename.name.startswith('__'):
-            continue
-
-        extractor_name = filename.name.replace('.py', '')
-
-        extractor_module = cast(ExtractorModuleProtocol, import_module(f'.{extractor_name}', package=__package__))
-
-        assert getattr(extractor_module, 'get_output_path')
-        EXTRACTORS[extractor_name] = extractor_module
-
-    return EXTRACTORS
-
-EXTRACTORS = get_extractors(EXTRACTORS_DIR)
--- a/archivebox/extractors/archive_org.py
+++ b/archivebox/extractors/archive_org.py
@ -10,12 +10,10 @@ from ..system import run, chmod_file
 from ..util import (
    enforce_types,
    is_static_file,
-    dedupe,
 )
 from ..config import (
    TIMEOUT,
    CURL_ARGS,
-    CURL_EXTRA_ARGS,
    CHECK_SSL_VALIDITY,
    SAVE_ARCHIVE_DOT_ORG,
    CURL_BINARY,
@ -24,8 +22,6 @@ from ..config import (
 )
 from ..logging_util import TimedProgress

-def get_output_path():
-    return 'archive.org.txt'


@enforce_types
@ -34,7 +30,7 @@ def should_save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, overwr
        return False

    out_dir = out_dir or Path(link.link_dir)
-    if not overwrite and (out_dir / get_output_path()).exists():
+    if not overwrite and (out_dir / 'archive.org.txt').exists():
        # if open(path, 'r', encoding='utf-8').read().strip() != 'None':
        return False

@ -45,21 +41,16 @@ def save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, timeout: int=
    """submit site to archive.org for archiving via their service, save returned archive url"""

    out_dir = out_dir or Path(link.link_dir)
-    output: ArchiveOutput = get_output_path()
+    output: ArchiveOutput = 'archive.org.txt'
    archive_org_url = None
    submit_url = 'https://web.archive.org/save/{}'.format(link.url)
-    # later options take precedence
-    options = [
+    cmd = [
+        CURL_BINARY,
        *CURL_ARGS,
-        *CURL_EXTRA_ARGS,
        '--head',
        '--max-time', str(timeout),
        *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
        *([] if CHECK_SSL_VALIDITY else ['--insecure']),
-    ]
-    cmd = [
-        CURL_BINARY,
-        *dedupe(options),
        submit_url,
    ]
    status = 'succeeded'
@ -90,7 +81,7 @@ def save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, timeout: int=
        archive_org_url = archive_org_url or submit_url
        with open(str(out_dir / output), 'w', encoding='utf-8') as f:
            f.write(archive_org_url)
-        chmod_file(str(out_dir / output), cwd=str(out_dir))
+        chmod_file('archive.org.txt', cwd=str(out_dir))
        output = archive_org_url

    return ArchiveResult(
--- a/archivebox/extractors/dom.py
+++ b/archivebox/extractors/dom.py
@ -19,9 +19,6 @@ from ..config import (
 from ..logging_util import TimedProgress


-def get_output_path():
-    return 'output.html'
-

@enforce_types
 def should_save_dom(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
@ -29,8 +26,8 @@ def should_save_dom(link: Link, out_dir: Optional[Path]=None, overwrite: Optiona
        return False

    out_dir = out_dir or Path(link.link_dir)
-    if not overwrite and (out_dir / get_output_path()).exists():
-        if (out_dir / get_output_path()).stat().st_size > 1:
+    if not overwrite and (out_dir / 'output.html').exists():
+        if (out_dir / 'output.html').stat().st_size > 1:
            return False

    return SAVE_DOM
@ -40,7 +37,7 @@ def save_dom(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
    """print HTML of site to file using chrome --dump-html"""

    out_dir = out_dir or Path(link.link_dir)
-    output: ArchiveOutput = get_output_path()
+    output: ArchiveOutput = 'output.html'
    output_path = out_dir / output
    cmd = [
        *chrome_args(),
--- a/archivebox/extractors/favicon.py
+++ b/archivebox/extractors/favicon.py
@ -6,18 +6,13 @@ from typing import Optional

 from ..index.schema import Link, ArchiveResult, ArchiveOutput
 from ..system import chmod_file, run
-from ..util import (
-    enforce_types,
-    domain,
-    dedupe,
-)
+from ..util import enforce_types, domain
 from ..config import (
    TIMEOUT,
    SAVE_FAVICON,
    FAVICON_PROVIDER,
    CURL_BINARY,
    CURL_ARGS,
-    CURL_EXTRA_ARGS,
    CURL_VERSION,
    CHECK_SSL_VALIDITY,
    CURL_USER_AGENT,
@ -33,29 +28,19 @@ def should_save_favicon(link: Link, out_dir: Optional[str]=None, overwrite: Opti

    return SAVE_FAVICON

-@enforce_types
-def get_output_path():
-    return 'favicon.ico'
-
-
@enforce_types
 def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
    """download site favicon from google's favicon api"""

    out_dir = out_dir or link.link_dir
    output: ArchiveOutput = 'favicon.ico'
-    # later options take precedence
-    options = [
+    cmd = [
+        CURL_BINARY,
        *CURL_ARGS,
-        *CURL_EXTRA_ARGS,
        '--max-time', str(timeout),
        '--output', str(output),
        *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
        *([] if CHECK_SSL_VALIDITY else ['--insecure']),
-    ]
-    cmd = [
-        CURL_BINARY,
-        *dedupe(options),
        FAVICON_PROVIDER.format(domain(link.url)),
    ]
    status = 'failed'
--- a/archivebox/extractors/git.py
+++ b/archivebox/extractors/git.py
@ -26,19 +26,6 @@ from ..config import (
 from ..logging_util import TimedProgress


-def get_output_path():
-    return 'git/'
-
-def get_embed_path(archiveresult=None):
-    if not archiveresult:
-        return get_output_path()
-
-    try:
-        return get_output_path() + list((archiveresult.snapshot_dir / get_output_path()).glob('*'))[0].name + '/'
-    except IndexError:
-        pass
-
-    return get_output_path()

@enforce_types
 def should_save_git(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
@ -46,7 +33,7 @@ def should_save_git(link: Link, out_dir: Optional[Path]=None, overwrite: Optiona
        return False

    out_dir = out_dir or Path(link.link_dir)
-    if not overwrite and (out_dir / get_output_path()).exists():
+    if not overwrite and (out_dir / 'git').exists():
        return False

    is_clonable_url = (
@ -64,7 +51,7 @@ def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
    """download full site using git"""

    out_dir = out_dir or Path(link.link_dir)
-    output: ArchiveOutput = get_output_path()
+    output: ArchiveOutput = 'git'
    output_path = out_dir / output
    output_path.mkdir(exist_ok=True)
    cmd = [
--- a/archivebox/extractors/headers.py
+++ b/archivebox/extractors/headers.py
@ -9,13 +9,11 @@ from ..system import atomic_write
 from ..util import (
    enforce_types,
    get_headers,
-    dedupe,
 )
 from ..config import (
    TIMEOUT,
    CURL_BINARY,
    CURL_ARGS,
-    CURL_EXTRA_ARGS,
    CURL_USER_AGENT,
    CURL_VERSION,
    CHECK_SSL_VALIDITY,
@ -23,14 +21,10 @@ from ..config import (
 )
 from ..logging_util import TimedProgress

-def get_output_path():
-    return 'headers.json'
-
-
@enforce_types
 def should_save_headers(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
    out_dir = out_dir or Path(link.link_dir)
-    if not overwrite and (out_dir / get_output_path()).exists():
+    if not overwrite and (out_dir / 'headers.json').exists():
        return False

    return SAVE_HEADERS
@ -42,28 +36,24 @@ def save_headers(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT)

    out_dir = Path(out_dir or link.link_dir)
    output_folder = out_dir.absolute()
-    output: ArchiveOutput = get_output_path()
+    output: ArchiveOutput = 'headers.json'

    status = 'succeeded'
    timer = TimedProgress(timeout, prefix='      ')
-    # later options take precedence
-    options = [
+
+    cmd = [
+        CURL_BINARY,
        *CURL_ARGS,
-        *CURL_EXTRA_ARGS,
        '--head',
        '--max-time', str(timeout),
        *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
        *([] if CHECK_SSL_VALIDITY else ['--insecure']),
-    ]
-    cmd = [
-        CURL_BINARY,
-        *dedupe(options),
        link.url,
    ]
    try:
        json_headers = get_headers(link.url, timeout=timeout)
        output_folder.mkdir(exist_ok=True)
-        atomic_write(str(output_folder / get_output_path()), json_headers)
+        atomic_write(str(output_folder / "headers.json"), json_headers)
    except (Exception, OSError) as err:
        status = 'failed'
        output = err
--- a/archivebox/extractors/htmltotext.py
+++ b/archivebox/extractors/htmltotext.py
@ -19,12 +19,6 @@ from ..util import (
 )
 from .title import get_html

-
-def get_output_path():
-    return "htmltotext.txt"
-
-
-
 class HTMLTextExtractor(HTMLParser):
    TEXT_ATTRS = [
        "alt", "cite", "href", "label",
@ -115,7 +109,7 @@ def should_save_htmltotext(link: Link, out_dir: Optional[Path]=None, overwrite:
        return False

    out_dir = out_dir or Path(link.link_dir)
-    if not overwrite and (out_dir / get_output_path()).exists():
+    if not overwrite and (out_dir / 'htmltotext.txt').exists():
        return False

    return SAVE_HTMLTOTEXT
@ -126,12 +120,10 @@ def save_htmltotext(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
    """extract search-indexing-friendly text from an HTML document"""

    out_dir = Path(out_dir or link.link_dir)
-    output = get_output_path()
-    cmd = ['(internal) archivebox.extractors.htmltotext', './{singlefile,dom}.html']
+    output = "htmltotext.txt"

    timer = TimedProgress(timeout, prefix='      ')
    extracted_text = None
-    status = 'failed'
    try:
        extractor = HTMLTextExtractor()
        document = get_html(link, out_dir)
@ -144,9 +136,10 @@ def save_htmltotext(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
        extracted_text = str(extractor)

        atomic_write(str(out_dir / output), extracted_text)
-        status = 'succeeded'
    except (Exception, OSError) as err:
+        status = 'failed'
        output = err
+        cmd = ['(internal) archivebox.extractors.htmltotext', './{singlefile,dom}.html']
    finally:
        timer.end()

--- a/archivebox/extractors/media.py
+++ b/archivebox/extractors/media.py
@ -8,13 +8,11 @@ from ..system import run, chmod_file
 from ..util import (
    enforce_types,
    is_static_file,
-    dedupe,
 )
 from ..config import (
    MEDIA_TIMEOUT,
    SAVE_MEDIA,
    YOUTUBEDL_ARGS,
-    YOUTUBEDL_EXTRA_ARGS,
    YOUTUBEDL_BINARY,
    YOUTUBEDL_VERSION,
    CHECK_SSL_VALIDITY
@ -22,27 +20,13 @@ from ..config import (
 from ..logging_util import TimedProgress


-def get_output_path():
-    return 'media/'
-
-def get_embed_path(archiveresult=None):
-    if not archiveresult:
-        return get_output_path()
-
-    out_dir = archiveresult.snapshot_dir / get_output_path()
-    try:
-        return get_output_path() + list(out_dir.glob('*.mp4'))[0].name
-    except IndexError:
-        return get_output_path()
-
-
@enforce_types
 def should_save_media(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
    if is_static_file(link.url):
        return False

    out_dir = out_dir or Path(link.link_dir)
-    if not overwrite and (out_dir / get_output_path()).exists():
+    if not overwrite and (out_dir / 'media').exists():
        return False

    return SAVE_MEDIA
@ -52,19 +36,14 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME
    """Download playlists or individual video, audio, and subtitles using youtube-dl or yt-dlp"""

    out_dir = out_dir or Path(link.link_dir)
-    output: ArchiveOutput = get_output_path()
+    output: ArchiveOutput = 'media'
    output_path = out_dir / output
    output_path.mkdir(exist_ok=True)
-    # later options take precedence
-    options = [
-        *YOUTUBEDL_ARGS,
-        *YOUTUBEDL_EXTRA_ARGS,
-        *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate']),
-        # TODO: add --cookies-from-browser={CHROME_USER_DATA_DIR}
-    ]
    cmd = [
        YOUTUBEDL_BINARY,
-        *dedupe(options),
+        *YOUTUBEDL_ARGS,
+        *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate']),
+        # TODO: add --cookies-from-browser={CHROME_USER_DATA_DIR}
        link.url,
    ]
    status = 'succeeded'
--- a/archivebox/extractors/mercury.py
+++ b/archivebox/extractors/mercury.py
@ -11,25 +11,17 @@ from ..system import run, atomic_write
 from ..util import (
    enforce_types,
    is_static_file,
-    dedupe,
+
 )
 from ..config import (
    TIMEOUT,
    SAVE_MERCURY,
    DEPENDENCIES,
    MERCURY_VERSION,
-    MERCURY_ARGS,
-    MERCURY_EXTRA_ARGS,
 )
 from ..logging_util import TimedProgress


-def get_output_path():
-    return 'mercury/'
-
-def get_embed_path(archiveresult=None):
-    return get_output_path() + 'content.html'
-

@enforce_types
 def ShellError(cmd: List[str], result: CompletedProcess, lines: int=20) -> ArchiveError:
@ -50,7 +42,7 @@ def should_save_mercury(link: Link, out_dir: Optional[str]=None, overwrite: Opti
        return False

    out_dir = out_dir or Path(link.link_dir)
-    if not overwrite and (out_dir / get_output_path()).exists():
+    if not overwrite and (out_dir / 'mercury').exists():
        return False

    return SAVE_MERCURY
@ -61,23 +53,19 @@ def save_mercury(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
    """download reader friendly version using @postlight/mercury-parser"""

    out_dir = Path(out_dir or link.link_dir)
-    output_folder = out_dir.absolute() / get_output_path()
-    output = get_output_path()
+    output_folder = out_dir.absolute() / "mercury"
+    output = "mercury"

    status = 'succeeded'
    timer = TimedProgress(timeout, prefix='      ')
    try:
        output_folder.mkdir(exist_ok=True)
-        # later options take precedence
-        options = [
-            *MERCURY_ARGS,
-            *MERCURY_EXTRA_ARGS,
-        ]
-        # By default, get plain text version of article
+
+        # Get plain text version of article
        cmd = [
            DEPENDENCIES['MERCURY_BINARY']['path'],
            link.url,
-            *dedupe(options)
+            "--format=text"
        ]
        result = run(cmd, cwd=out_dir, timeout=timeout)
        try:
--- a/archivebox/extractors/pdf.py
+++ b/archivebox/extractors/pdf.py
@ -19,17 +19,13 @@ from ..config import (
 from ..logging_util import TimedProgress


-def get_output_path():
-    return 'output.pdf'
-
-
@enforce_types
 def should_save_pdf(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
    if is_static_file(link.url):
        return False

    out_dir = out_dir or Path(link.link_dir)
-    if not overwrite and (out_dir / get_output_path()).exists():
+    if not overwrite and (out_dir / 'output.pdf').exists():
        return False

    return SAVE_PDF
@ -40,7 +36,7 @@ def save_pdf(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
    """print PDF of site to file using chrome --headless"""

    out_dir = out_dir or Path(link.link_dir)
-    output: ArchiveOutput = get_output_path()
+    output: ArchiveOutput = 'output.pdf'
    cmd = [
        *chrome_args(),
        '--print-to-pdf',
@ -55,7 +51,7 @@ def save_pdf(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
            hints = (result.stderr or result.stdout).decode()
            raise ArchiveError('Failed to save PDF', hints)
        
-        chmod_file(get_output_path(), cwd=str(out_dir))
+        chmod_file('output.pdf', cwd=str(out_dir))
    except Exception as err:
        status = 'failed'
        output = err
--- a/archivebox/extractors/readability.py
+++ b/archivebox/extractors/readability.py
@ -22,12 +22,6 @@ from ..config import (
 from ..logging_util import TimedProgress
 from .title import get_html

-def get_output_path():
-    return 'readability/'
-
-def get_embed_path(archiveresult=None):
-    return get_output_path() + 'content.html'
-

@enforce_types
 def should_save_readability(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
@ -35,7 +29,7 @@ def should_save_readability(link: Link, out_dir: Optional[str]=None, overwrite:
        return False

    out_dir = out_dir or Path(link.link_dir)
-    if not overwrite and (out_dir / get_output_path()).exists():
+    if not overwrite and (out_dir / 'readability').exists():
        return False

    return SAVE_READABILITY
@ -46,8 +40,8 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
    """download reader friendly version using @mozilla/readability"""

    out_dir = Path(out_dir or link.link_dir)
-    output_folder = out_dir.absolute() / get_output_path()
-    output = get_output_path()
+    output_folder = out_dir.absolute() / "readability"
+    output = "readability"

    # Readability Docs: https://github.com/mozilla/readability

--- a/archivebox/extractors/screenshot.py
+++ b/archivebox/extractors/screenshot.py
@ -19,9 +19,6 @@ from ..config import (
 from ..logging_util import TimedProgress


-def get_output_path():
-    return 'screenshot.png'
-

@enforce_types
 def should_save_screenshot(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
@ -29,7 +26,7 @@ def should_save_screenshot(link: Link, out_dir: Optional[Path]=None, overwrite:
        return False

    out_dir = out_dir or Path(link.link_dir)
-    if not overwrite and (out_dir / get_output_path()).exists():
+    if not overwrite and (out_dir / 'screenshot.png').exists():
        return False

    return SAVE_SCREENSHOT
@ -39,7 +36,7 @@ def save_screenshot(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
    """take screenshot of site using chrome --headless"""
    
    out_dir = out_dir or Path(link.link_dir)
-    output: ArchiveOutput = get_output_path()
+    output: ArchiveOutput = 'screenshot.png'
    cmd = [
        *chrome_args(),
        '--screenshot',
--- a/archivebox/extractors/singlefile.py
+++ b/archivebox/extractors/singlefile.py
@ -11,7 +11,6 @@ from ..util import (
    enforce_types,
    is_static_file,
    chrome_args,
-    dedupe,
 )
 from ..config import (
    TIMEOUT,
@ -19,24 +18,18 @@ from ..config import (
    DEPENDENCIES,
    SINGLEFILE_VERSION,
    SINGLEFILE_ARGS,
-    SINGLEFILE_EXTRA_ARGS,
    CHROME_BINARY,
-    COOKIES_FILE,
 )
 from ..logging_util import TimedProgress


-def get_output_path():
-    return 'singlefile.html'
-
-
@enforce_types
 def should_save_singlefile(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
    if is_static_file(link.url):
        return False

    out_dir = out_dir or Path(link.link_dir)
-    if not overwrite and (out_dir / get_output_path()).exists():
+    if not overwrite and (out_dir / 'singlefile.html').exists():
        return False

    return SAVE_SINGLEFILE
@ -47,30 +40,43 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
    """download full site using single-file"""

    out_dir = out_dir or Path(link.link_dir)
-    output = get_output_path()
+    output = "singlefile.html"

    browser_args = chrome_args(CHROME_TIMEOUT=0)

    # SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
    browser_args = '--browser-args={}'.format(json.dumps(browser_args[1:]))
-    # later options take precedence
    options = [
-        '--browser-executable-path={}'.format(CHROME_BINARY),
-        *(["--browser-cookies-file={}".format(COOKIES_FILE)] if COOKIES_FILE else []),
-        browser_args,
        *SINGLEFILE_ARGS,
-        *SINGLEFILE_EXTRA_ARGS,
+        '--browser-executable-path={}'.format(CHROME_BINARY),
+        browser_args,
    ]
+
+    # Deduplicate options (single-file doesn't like when you use the same option two times)
+    #
+    # NOTE: Options names that come first clobber conflicting names that come later
+    # My logic is SINGLEFILE_ARGS is the option that affects the singlefile command with most 
+    # specificity, therefore the user sets it with a lot intent, therefore it should take precedence 
+    # kind of like the ergonomic principle of lexical scope in programming languages.
+    seen_option_names = []
+    def test_seen(argument):
+        option_name = argument.split("=")[0]
+        if option_name in seen_option_names:
+            return False
+        else:
+            seen_option_names.append(option_name)
+            return True
+    deduped_options = list(filter(test_seen, options))
+
    cmd = [
        DEPENDENCIES['SINGLEFILE_BINARY']['path'],
-        *dedupe(options),
+        *deduped_options,
        link.url,
        output,
    ]

    status = 'succeeded'
    timer = TimedProgress(timeout, prefix='      ')
-    result = None
    try:
        result = run(cmd, cwd=str(out_dir), timeout=timeout)

@ -78,7 +84,7 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
        #  "Downloaded: 76 files, 4.0M in 1.6s (2.52 MB/s)"
        output_tail = [
            line.strip()
-            for line in (result.stdout + result.stderr).decode().rsplit('\n', 5)[-5:]
+            for line in (result.stdout + result.stderr).decode().rsplit('\n', 3)[-3:]
            if line.strip()
        ]
        hints = (
@ -88,14 +94,12 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO

        # Check for common failure cases
        if (result.returncode > 0) or not (out_dir / output).is_file():
-            raise ArchiveError(f'SingleFile was not able to archive the page (status={result.returncode})', hints)
+            raise ArchiveError('SingleFile was not able to archive the page', hints)
        chmod_file(output, cwd=str(out_dir))
    except (Exception, OSError) as err:
        status = 'failed'
        # TODO: Make this prettier. This is necessary to run the command (escape JSON internal quotes).
        cmd[2] = browser_args.replace('"', "\\\"")
-        if result:
-            err.hints = (result.stdout + result.stderr).decode().split('\n')
        output = err
    finally:
        timer.end()
--- a/archivebox/extractors/title.py
+++ b/archivebox/extractors/title.py
@ -10,7 +10,6 @@ from ..util import (
    enforce_types,
    download_url,
    htmldecode,
-    dedupe,
 )
 from ..config import (
    TIMEOUT,
@ -18,7 +17,6 @@ from ..config import (
    SAVE_TITLE,
    CURL_BINARY,
    CURL_ARGS,
-    CURL_EXTRA_ARGS,
    CURL_VERSION,
    CURL_USER_AGENT,
 )
@ -60,7 +58,6 @@ class TitleParser(HTMLParser):
        if tag.lower() == "title":
            self.inside_title_tag = False

-
@enforce_types
 def get_html(link: Link, path: Path, timeout: int=TIMEOUT) -> str:
    """
@ -78,20 +75,13 @@ def get_html(link: Link, path: Path, timeout: int=TIMEOUT) -> str:
            with open(abs_path / source, "r", encoding="utf-8") as f:
                document = f.read()
                break
-        except (FileNotFoundError, TypeError, UnicodeDecodeError):
+        except (FileNotFoundError, TypeError):
            continue
    if document is None:
        return download_url(link.url, timeout=timeout)
    else:
        return document

-
-def get_output_path():
-    # TODO: actually save title to this file
-    # (currently only saved in ArchiveResult.output as charfield value, not saved to filesystem)
-    return 'title.json'
-
-
@enforce_types
 def should_save_title(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
    # if link already has valid title, skip it
@ -112,17 +102,12 @@ def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -
    from core.models import Snapshot

    output: ArchiveOutput = None
-    # later options take precedence
-    options = [
+    cmd = [
+        CURL_BINARY,
        *CURL_ARGS,
-        *CURL_EXTRA_ARGS,
        '--max-time', str(timeout),
        *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
        *([] if CHECK_SSL_VALIDITY else ['--insecure']),
-    ]
-    cmd = [
-        CURL_BINARY,
-        *dedupe(options),
        link.url,
    ]
    status = 'succeeded'
--- a/archivebox/extractors/wget.py
+++ b/archivebox/extractors/wget.py
@ -15,11 +15,9 @@ from ..util import (
    path,
    domain,
    urldecode,
-    dedupe,
 )
 from ..config import (
    WGET_ARGS,
-    WGET_EXTRA_ARGS,
    TIMEOUT,
    SAVE_WGET,
    SAVE_WARC,
@ -35,18 +33,6 @@ from ..config import (
 from ..logging_util import TimedProgress


-def get_output_path():
-    # TODO: actually save output into this folder, instead of do {domain}/**/index.html
-    return 'wget/'
-
-def get_embed_path(archiveresult=None):
-    if not archiveresult:
-        return get_output_path()
-
-    link = archiveresult.snapshot.as_link()
-    return wget_output_path(link)
-
-
@enforce_types
 def should_save_wget(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
    output_path = wget_output_path(link)
@ -69,10 +55,10 @@ def save_wget(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->

    # WGET CLI Docs: https://www.gnu.org/software/wget/manual/wget.html
    output: ArchiveOutput = None
-    # later options take precedence
-    options = [
+    cmd = [
+        WGET_BINARY,
+        # '--server-response',  # print headers for better error parsing
        *WGET_ARGS,
-        *WGET_EXTRA_ARGS,
        '--timeout={}'.format(timeout),
        *(['--restrict-file-names={}'.format(RESTRICT_FILE_NAMES)] if RESTRICT_FILE_NAMES else []),
        *(['--warc-file={}'.format(str(warc_path))] if SAVE_WARC else []),
@ -82,11 +68,6 @@ def save_wget(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
        *(['--compression=auto'] if WGET_AUTO_COMPRESSION else []),
        *([] if SAVE_WARC else ['--timestamping']),
        *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate', '--no-hsts']),
-        # '--server-response',  # print headers for better error parsing
-    ]
-    cmd = [
-        WGET_BINARY,
-        *dedupe(options),
        link.url,
    ]

@ -145,38 +126,64 @@ def save_wget(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->


@enforce_types
-def unsafe_wget_output_path(link: Link) -> Optional[str]:
-    # There used to be a bunch of complex reverse-engineering path mapping logic here,
-    # but it was removed in favor of just walking through the output folder recursively to try to find the
-    # html file that wget produced. It's *much much much* slower than deriving it statically, and is currently
-    # one of the main bottlenecks of ArchiveBox's performance (the output data is often on a slow HDD or network mount).
-    # But it's STILL better than trying to figure out URL -> html filepath mappings ourselves from first principles.
+def wget_output_path(link: Link) -> Optional[str]:
+    """calculate the path to the wgetted .html file, since wget may
+    adjust some paths to be different than the base_url path.
+
+    See docs on wget --adjust-extension (-E)
+    """
+    
+    # Wget downloads can save in a number of different ways depending on the url:
+    #    https://example.com
+    #       > example.com/index.html
+    #    https://example.com?v=zzVa_tX1OiI
+    #       > example.com/index.html?v=zzVa_tX1OiI.html
+    #    https://www.example.com/?v=zzVa_tX1OiI
+    #       > example.com/index.html?v=zzVa_tX1OiI.html
+
+    #    https://example.com/abc
+    #       > example.com/abc.html
+    #    https://example.com/abc/
+    #       > example.com/abc/index.html
+    #    https://example.com/abc?v=zzVa_tX1OiI.html
+    #       > example.com/abc?v=zzVa_tX1OiI.html
+    #    https://example.com/abc/?v=zzVa_tX1OiI.html
+    #       > example.com/abc/index.html?v=zzVa_tX1OiI.html
+
+    #    https://example.com/abc/test.html
+    #       > example.com/abc/test.html
+    #    https://example.com/abc/test?v=zzVa_tX1OiI
+    #       > example.com/abc/test?v=zzVa_tX1OiI.html
+    #    https://example.com/abc/test/?v=zzVa_tX1OiI
+    #       > example.com/abc/test/index.html?v=zzVa_tX1OiI.html
+
+    # There's also lots of complexity around how the urlencoding and renaming
+    # is done for pages with query and hash fragments or extensions like shtml / htm / php / etc
+
+    # Since the wget algorithm for -E (appending .html) is incredibly complex
+    # and there's no way to get the computed output path from wget
+    # in order to avoid having to reverse-engineer how they calculate it,
+    # we just look in the output folder read the filename wget used from the filesystem
    full_path = without_fragment(without_query(path(link.url))).strip('/')
    search_dir = Path(link.link_dir) / domain(link.url).replace(":", "+") / urldecode(full_path)
    for _ in range(4):
-        try:
-            if search_dir.exists():
-                if search_dir.is_dir():
-                    html_files = [
-                        f for f in search_dir.iterdir()
-                        if re.search(".+\\.[Ss]?[Hh][Tt][Mm][Ll]?$", str(f), re.I | re.M)
-                    ]
-                    if html_files:
-                        return str(html_files[0].relative_to(link.link_dir))
+        if search_dir.exists():
+            if search_dir.is_dir():
+                html_files = [
+                    f for f in search_dir.iterdir()
+                    if re.search(".+\\.[Ss]?[Hh][Tt][Mm][Ll]?$", str(f), re.I | re.M)
+                ]
+                if html_files:
+                    return str(html_files[0].relative_to(link.link_dir))

-                    # sometimes wget'd URLs have no ext and return non-html
-                    # e.g. /some/example/rss/all -> some RSS XML content)
-                    #      /some/other/url.o4g   -> some binary unrecognized ext)
-                    # test this with archivebox add --depth=1 https://getpocket.com/users/nikisweeting/feed/all
-                    last_part_of_url = urldecode(full_path.rsplit('/', 1)[-1])
-                    for file_present in search_dir.iterdir():
-                        if file_present == last_part_of_url:
-                            return str((search_dir / file_present).relative_to(link.link_dir))
-        except OSError:
-            # OSError 36 and others can happen here, caused by trying to check for impossible paths
-            # (paths derived from URLs can often contain illegal unicode characters or be too long,
-            # causing the OS / filesystem to reject trying to open them with a system-level error)
-            pass
+                # sometimes wget'd URLs have no ext and return non-html
+                # e.g. /some/example/rss/all -> some RSS XML content)
+                #      /some/other/url.o4g   -> some binary unrecognized ext)
+                # test this with archivebox add --depth=1 https://getpocket.com/users/nikisweeting/feed/all
+                last_part_of_url = urldecode(full_path.rsplit('/', 1)[-1])
+                for file_present in search_dir.iterdir():
+                    if file_present == last_part_of_url:
+                        return str((search_dir / file_present).relative_to(link.link_dir))

        # Move up one directory level
        search_dir = search_dir.parent
@ -186,101 +193,13 @@ def unsafe_wget_output_path(link: Link) -> Optional[str]:

    # check for literally any file present that isnt an empty folder
    domain_dir = Path(domain(link.url).replace(":", "+"))
-    files_within = [path for path in (Path(link.link_dir) / domain_dir).glob('**/*.*') if not str(path).endswith('.orig')]
+    files_within = list((Path(link.link_dir) / domain_dir).glob('**/*.*'))
    if files_within:
        return str((domain_dir / files_within[-1]).relative_to(link.link_dir))
    
-    # abandon all hope, wget either never downloaded, or it produced an output path so horribly mutilated
-    # that it's better we just pretend it doesnt exist
-    # this is why ArchiveBox's specializes in REDUNDANTLY saving copies of sites with multiple different tools
-    return None
-
-
-@enforce_types
-def wget_output_path(link: Link) -> Optional[str]:
-    """calculate the path to the wgetted .html file, since wget may
-    adjust some paths to be different than the base_url path.
-
-    See docs on: wget --adjust-extension (-E), --restrict-file-names=windows|unix|ascii, --convert-links
-
-    WARNING: this function is extremely error prone because mapping URLs to filesystem paths deterministically
-    is basically impossible. Every OS and filesystem have different requirements on what special characters are
-    allowed, and URLs are *full* of all kinds of special characters, illegal unicode, and generally unsafe strings
-    that you dont want anywhere near your filesystem. Also URLs can be obscenely long, but most filesystems dont
-    accept paths longer than 250 characters. On top of all that, this function only exists to try to reverse engineer
-    wget's approach to solving this problem, so this is a shittier, less tested version of their already insanely
-    complicated attempt to do this. Here be dragons:
-        - https://github.com/ArchiveBox/ArchiveBox/issues/549
-        - https://github.com/ArchiveBox/ArchiveBox/issues/1373
-        - https://stackoverflow.com/questions/9532499/check-whether-a-path-is-valid-in-python-without-creating-a-file-at-the-paths-ta
-        - and probably many more that I didn't realize were caused by this...
-
-    The only constructive thing we could possibly do to this function is to figure out how to remove it.
-
-    Preach loudly to anyone who will listen: never attempt to map URLs to filesystem paths,
-    and pray you never have to deal with the aftermath of someone else's attempt to do so...
-    """
-    
-    # Wget downloads can save in a number of different ways depending on the url:
-    #    https://example.com
-    #       > example.com/index.html
-    #    https://example.com?v=zzVa_tX1OiI
-    #       > example.com/index.html@v=zzVa_tX1OiI.html
-    #    https://www.example.com/?v=zzVa_tX1OiI
-    #       > example.com/index.html@v=zzVa_tX1OiI.html
-
-    #    https://example.com/abc
-    #       > example.com/abc.html
-    #    https://example.com/abc/
-    #       > example.com/abc/index.html
-    #    https://example.com/abc?v=zzVa_tX1OiI.html
-    #       > example.com/abc@v=zzVa_tX1OiI.html
-    #    https://example.com/abc/?v=zzVa_tX1OiI.html
-    #       > example.com/abc/index.html@v=zzVa_tX1OiI.html
-
-    #    https://example.com/abc/test.html
-    #       > example.com/abc/test.html
-    #    https://example.com/abc/test?v=zzVa_tX1OiI
-    #       > example.com/abc/test@v=zzVa_tX1OiI.html
-    #    https://example.com/abc/test/?v=zzVa_tX1OiI
-    #       > example.com/abc/test/index.html@v=zzVa_tX1OiI.html
-
-    # There's also lots of complexity around how the urlencoding and renaming
-    # is done for pages with query and hash fragments, extensions like shtml / htm / php / etc,
-    # unicode escape sequences, punycode domain names, unicode double-width characters, extensions longer than
-    # 4 characters, paths with multipe extensions, etc. the list goes on...
-
-    output_path = None
-    try:
-        output_path = unsafe_wget_output_path(link)
-    except Exception as err:
-        pass           # better to pretend it just failed to download than expose gnarly OSErrors to users
-
-    # check for unprintable unicode characters
-    # https://github.com/ArchiveBox/ArchiveBox/issues/1373
-    if output_path:
-        safe_path = output_path.encode('utf-8', 'replace').decode()
-        if output_path != safe_path:
-            # contains unprintable unicode characters that will break other parts of archivebox
-            # better to pretend it doesnt exist and fallback to parent dir than crash archivebox
-            output_path = None
-
-    # check for a path that is just too long to safely handle across different OS's
-    # https://github.com/ArchiveBox/ArchiveBox/issues/549
-    if output_path and len(output_path) > 250:
-        output_path = None
-
-    if output_path:
-        return output_path
-
    # fallback to just the domain dir
    search_dir = Path(link.link_dir) / domain(link.url).replace(":", "+")
    if search_dir.is_dir():
        return domain(link.url).replace(":", "+")

-    # fallback to just the domain dir without port
-    search_dir = Path(link.link_dir) / domain(link.url).split(":", 1)[0]
-    if search_dir.is_dir():
-        return domain(link.url).split(":", 1)[0]
-
    return None
--- a/archivebox/index/init.py
+++ b/archivebox/index/init.py
@ -250,7 +250,7 @@ def load_main_index(out_dir: Path=OUTPUT_DIR, warn: bool=True) -> List[Link]:
    """parse and load existing index with any new links from import_path merged in"""
    from core.models import Snapshot
    try:
-        return Snapshot.objects.all().only('id')
+        return Snapshot.objects.all()

    except (KeyboardInterrupt, SystemExit):
        raise SystemExit(0)
@ -407,7 +407,7 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type

 def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
    """indexed links without checking archive status or data directory validity"""
-    links = (snapshot.as_link() for snapshot in snapshots.iterator())
+    links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
    return {
        link.link_dir: link
        for link in links
@ -415,7 +415,7 @@ def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Option

 def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
    """indexed links that are archived with a valid data directory"""
-    links = (snapshot.as_link() for snapshot in snapshots.iterator())
+    links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
    return {
        link.link_dir: link
        for link in filter(is_archived, links)
@ -423,7 +423,7 @@ def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optio

 def get_unarchived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
    """indexed links that are unarchived with no data directory or an empty data directory"""
-    links = (snapshot.as_link() for snapshot in snapshots.iterator())
+    links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
    return {
        link.link_dir: link
        for link in filter(is_unarchived, links)
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@ -118,10 +118,10 @@ def render_django_template(template: str, context: Mapping[str, str]) -> str:


 def snapshot_icons(snapshot) -> str:
-    cache_key = f'{snapshot.pk}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
+    cache_key = f'{snapshot.id}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
    
    def calc_snapshot_icons():
-        from core.models import EXTRACTOR_CHOICES
+        from core.models import EXTRACTORS
        # start = datetime.now(timezone.utc)

        archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
@ -147,12 +147,12 @@ def snapshot_icons(snapshot) -> str:
        # Missing specific entry for WARC

        extractor_outputs = defaultdict(lambda: None)
-        for extractor, _ in EXTRACTOR_CHOICES:
+        for extractor, _ in EXTRACTORS:
            for result in archive_results:
                if result.extractor == extractor and result:
                    extractor_outputs[extractor] = result

-        for extractor, _ in EXTRACTOR_CHOICES:
+        for extractor, _ in EXTRACTORS:
            if extractor not in exclude:
                existing = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
                # Check filesystsem to see if anything is actually present (too slow, needs optimization/caching)
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@ -4,7 +4,6 @@ WARNING: THIS FILE IS ALL LEGACY CODE TO BE REMOVED.

 DO NOT ADD ANY NEW FEATURES TO THIS FILE, NEW CODE GOES HERE: core/models.py

-These are the old types we used to use before ArchiveBox v0.4 (before we switched to Django).
 """

 __package__ = 'archivebox.index'
@ -192,9 +191,6 @@ class Link:
        if extended:
            info.update({
                'snapshot_id': self.snapshot_id,
-                'snapshot_uuid': self.snapshot_uuid,
-                'snapshot_abid': self.snapshot_abid,
-
                'link_dir': self.link_dir,
                'archive_path': self.archive_path,
                
@ -263,22 +259,10 @@ class Link:

        return to_csv(self, cols=cols or self.field_names(), separator=separator, ljust=ljust)

-    @cached_property
-    def snapshot(self):
-        from core.models import Snapshot
-        return Snapshot.objects.only('uuid').get(url=self.url)
-
    @cached_property
    def snapshot_id(self):
-        return str(self.snapshot.pk)
-
-    @cached_property
-    def snapshot_uuid(self):
-        return str(self.snapshot.uuid)
-
-    @cached_property
-    def snapshot_abid(self):
-        return str(self.snapshot.ABID)
+        from core.models import Snapshot
+        return str(Snapshot.objects.only('id').get(url=self.url).id)

    @classmethod
    def field_names(cls):
@ -395,15 +379,11 @@ class Link:

        output_paths = (
            domain(self.url),
-            'output.html',
            'output.pdf',
            'screenshot.png',
-            'singlefile.html',
-            'readability/content.html',
-            'mercury/content.html',
-            'htmltotext.txt',
+            'output.html',
            'media',
-            'git',
+            'singlefile.html'
        )

        return any(
--- a/archivebox/index/sql.py
+++ b/archivebox/index/sql.py
@ -45,8 +45,7 @@ def write_link_to_sql_index(link: Link):
    info.pop('tags')

    try:
-        snapshot = Snapshot.objects.get(url=link.url)
-        info["timestamp"] = snapshot.timestamp
+        info["timestamp"] = Snapshot.objects.get(url=link.url).timestamp
    except Snapshot.DoesNotExist:
        while Snapshot.objects.filter(timestamp=info["timestamp"]).exists():
            info["timestamp"] = str(float(info["timestamp"]) + 1.0)
@ -58,7 +57,7 @@ def write_link_to_sql_index(link: Link):
        for entry in entries:
            if isinstance(entry, dict):
                result, _ = ArchiveResult.objects.get_or_create(
-                    snapshot_id=snapshot.pk,
+                    snapshot_id=snapshot.id,
                    extractor=extractor,
                    start_ts=parse_date(entry['start_ts']),
                    defaults={
@ -72,7 +71,7 @@ def write_link_to_sql_index(link: Link):
                )
            else:
                result, _ = ArchiveResult.objects.update_or_create(
-                    snapshot_id=snapshot.pk,
+                    snapshot_id=snapshot.id,
                    extractor=extractor,
                    start_ts=parse_date(entry.start_ts),
                    defaults={
@ -143,12 +142,7 @@ def list_migrations(out_dir: Path=OUTPUT_DIR) -> List[Tuple[bool, str]]:
 def apply_migrations(out_dir: Path=OUTPUT_DIR) -> List[str]:
    from django.core.management import call_command
    null, out = StringIO(), StringIO()
-    try:
-        call_command("makemigrations", interactive=False, stdout=null)
-    except Exception as e:
-        print('[!] Failed to create some migrations. Please open an issue and copy paste this output for help: {}'.format(e))
-        print()
-    
+    call_command("makemigrations", interactive=False, stdout=null)
    call_command("migrate", interactive=False, stdout=out)
    out.seek(0)

--- a/archivebox/logging_util.py
+++ b/archivebox/logging_util.py
@ -433,13 +433,11 @@ def log_archive_method_finished(result: "ArchiveResult"):
                ),
            ]

-        # import pudb; pudb.set_trace()
-
        # Prettify error output hints string and limit to five lines
        hints = getattr(result.output, 'hints', None) or ()
        if hints:
            if isinstance(hints, (list, tuple, type(_ for _ in ()))):
-                hints = [hint.decode() if isinstance(hint, bytes) else str(hint) for hint in hints]
+                hints = [hint.decode() for hint in hints if isinstance(hint, bytes)]
            else:
                if isinstance(hints, bytes):
                    hints = hints.decode()
@ -638,15 +636,17 @@ def printable_folder_status(name: str, folder: Dict) -> str:

@enforce_types
 def printable_dependency_version(name: str, dependency: Dict) -> str:
-    color, symbol, note, version = 'red', 'X', 'invalid', '?'
-
+    version = None
    if dependency['enabled']:
        if dependency['is_valid']:
-            color, symbol, note = 'green', '√', 'valid'
+            color, symbol, note, version = 'green', '√', 'valid', ''

            parsed_version_num = re.search(r'[\d\.]+', dependency['version'])
            if parsed_version_num:
                version = f'v{parsed_version_num[0]}'
+
+        if not version:
+            color, symbol, note, version = 'red', 'X', 'invalid', '?'
    else:
        color, symbol, note, version = 'lightyellow', '-', 'disabled', '-'

--- a/archivebox/main.py
+++ b/archivebox/main.py
@ -104,6 +104,7 @@ from .config import (
    COMMIT_HASH,
    BUILD_TIME,
    CODE_LOCATIONS,
+    EXTERNAL_LOCATIONS,
    DATA_LOCATIONS,
    DEPENDENCIES,
    CHROME_BINARY,
@ -230,7 +231,7 @@ def version(quiet: bool=False,
        p = platform.uname()
        print(
            'ArchiveBox v{}'.format(get_version(CONFIG)),
-            f'COMMIT_HASH={COMMIT_HASH[:7] if COMMIT_HASH else "unknown"}',
+            *((f'COMMIT_HASH={COMMIT_HASH[:7]}',) if COMMIT_HASH else ()),
            f'BUILD_TIME={BUILD_TIME}',
        )
        print(
@ -271,6 +272,11 @@ def version(quiet: bool=False,
        for name, path in CODE_LOCATIONS.items():
            print(printable_folder_status(name, path))

+        print()
+        print('{white}[i] Secrets locations:{reset}'.format(**ANSI))
+        for name, path in EXTERNAL_LOCATIONS.items():
+            print(printable_folder_status(name, path))
+
        print()
        if DATA_LOCATIONS['OUTPUT_DIR']['is_valid']:
            print('{white}[i] Data locations:{reset}'.format(**ANSI))
@ -689,7 +695,7 @@ def add(urls: Union[str, List[str]],
    if CAN_UPGRADE:
        hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")

-    return new_links
+    return all_links

@enforce_types
 def remove(filter_str: Optional[str]=None,
@ -785,8 +791,6 @@ def update(resume: Optional[float]=None,
           out_dir: Path=OUTPUT_DIR) -> List[Link]:
    """Import any new links from subscriptions and retry any previously failed/skipped links"""

-    from core.models import ArchiveResult
-
    check_data_folder(out_dir=out_dir)
    check_dependencies()
    new_links: List[Link] = [] # TODO: Remove input argument: only_new
@ -794,23 +798,19 @@ def update(resume: Optional[float]=None,
    extractors = extractors.split(",") if extractors else []

    # Step 1: Filter for selected_links
-    print('[*] Finding matching Snapshots to update...')
-    print(f'    - Filtering by {" ".join(filter_patterns)} ({filter_type}) {before=} {after=} {status=}...')
    matching_snapshots = list_links(
        filter_patterns=filter_patterns,
        filter_type=filter_type,
        before=before,
        after=after,
    )
-    print(f'    - Checking {matching_snapshots.count()} snapshot folders for existing data with {status=}...')
+
    matching_folders = list_folders(
        links=matching_snapshots,
        status=status,
        out_dir=out_dir,
    )
-    all_links = (link for link in matching_folders.values() if link)
-    print('    - Sorting by most unfinished -> least unfinished + date archived...')
-    all_links = sorted(all_links, key=lambda link: (ArchiveResult.objects.filter(snapshot__url=link.url).count(), link.timestamp))
+    all_links = [link for link in matching_folders.values() if link]

    if index_only:
        for link in all_links:
@ -836,7 +836,6 @@ def update(resume: Optional[float]=None,
    if extractors:
        archive_kwargs["methods"] = extractors

-
    archive_links(to_archive, overwrite=overwrite, **archive_kwargs)

    # Step 4: Re-write links index with updated titles, icons, and resources
@ -1356,7 +1355,7 @@ def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
    if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY):
        stderr('[!] Warning: you need to pass -it to use interactive commands in docker', color='lightyellow')
        stderr('    docker run -it archivebox manage {}'.format(' '.join(args or ['...'])), color='lightyellow')
-        stderr('')
+        stderr()

    execute_from_command_line([f'{ARCHIVEBOX_BINARY} manage', *(args or ['help'])])

--- a/archivebox/manage.py
+++ b/archivebox/manage.py
@ -7,7 +7,7 @@ if __name__ == '__main__':
    # versions of ./manage.py commands whenever possible. When that's not possible
    # (e.g. makemigrations), you can comment out this check temporarily

-    if not ('makemigrations' in sys.argv or 'migrate' in sys.argv or 'startapp' in sys.argv):
+    if not ('makemigrations' in sys.argv or 'migrate' in sys.argv):
        print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):")
        print()
        print('    Hint: Use these archivebox CLI commands instead of the ./manage.py equivalents:')
--- a/archivebox/monkey_patches.py
+++ b/archivebox/monkey_patches.py
@ -1,16 +0,0 @@
-__package__ = 'archivebox'
-
-import django_stubs_ext
-
-django_stubs_ext.monkeypatch()
-
-
-# monkey patch django timezone to add back utc (it was removed in Django 5.0)
-import datetime
-from django.utils import timezone
-timezone.utc = datetime.timezone.utc
-
-
-# monkey patch django-signals-webhooks to change how it shows up in Admin UI
-# from signal_webhooks.apps import DjangoSignalWebhooksConfig
-# DjangoSignalWebhooksConfig.verbose_name = 'API'
--- a/archivebox/package-lock.json
+++ b/archivebox/package-lock.json
--- a/archivebox/package.json
+++ b/archivebox/package.json
@ -1,6 +1,6 @@
 {
  "name": "archivebox",
-  "version": "0.8.1",
+  "version": "0.7.2",
  "description": "ArchiveBox: The self-hosted internet archive",
  "author": "Nick Sweeting <archivebox-npm@sweeting.me>",
  "repository": "github:ArchiveBox/ArchiveBox",
@ -8,6 +8,6 @@
  "dependencies": {
    "@postlight/parser": "^2.2.3",
    "readability-extractor": "github:ArchiveBox/readability-extractor",
-    "single-file-cli": "^1.1.54"
+    "single-file-cli": "^1.1.46"
  }
 }
--- a/archivebox/parsers/init.py
+++ b/archivebox/parsers/init.py
@ -7,6 +7,7 @@ For examples of supported import formats see tests/.

 __package__ = 'archivebox.parsers'

+import re
 from io import StringIO

 from typing import IO, Tuple, List, Optional
@ -27,6 +28,7 @@ from ..util import (
    htmldecode,
    download_url,
    enforce_types,
+    URL_REGEX,
 )
 from ..index.schema import Link
 from ..logging_util import TimedProgress, log_source_saved
@ -42,7 +44,6 @@ from . import medium_rss
 from . import netscape_html
 from . import generic_rss
 from . import generic_json
-from . import generic_jsonl
 from . import generic_html
 from . import generic_txt
 from . import url_list
@ -62,7 +63,6 @@ PARSERS = {
    netscape_html.KEY:  (netscape_html.NAME,    netscape_html.PARSER),
    generic_rss.KEY:    (generic_rss.NAME,      generic_rss.PARSER),
    generic_json.KEY:   (generic_json.NAME,     generic_json.PARSER),
-    generic_jsonl.KEY:  (generic_jsonl.NAME,    generic_jsonl.PARSER),
    generic_html.KEY:   (generic_html.NAME,     generic_html.PARSER),

    # Catchall fallback parser
@ -200,3 +200,54 @@ def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{ba
    log_source_saved(source_file=source_path)

    return source_path
+
+
+# Check that plain text regex URL parsing works as expected
+#   this is last-line-of-defense to make sure the URL_REGEX isn't
+#   misbehaving due to some OS-level or environment level quirks (e.g. bad regex lib)
+#   the consequences of bad URL parsing could be disastrous and lead to many
+#   incorrect/badly parsed links being added to the archive, so this is worth the cost of checking
+_test_url_strs = {
+    'example.com': 0,
+    '/example.com': 0,
+    '//example.com': 0,
+    ':/example.com': 0,
+    '://example.com': 0,
+    'htt://example8.com': 0,
+    '/htt://example.com': 0,
+    'https://example': 1,
+    'https://localhost/2345': 1,
+    'https://localhost:1234/123': 1,
+    '://': 0,
+    'https://': 0,
+    'http://': 0,
+    'ftp://': 0,
+    'ftp://example.com': 0,
+    'https://example.com': 1,
+    'https://example.com/': 1,
+    'https://a.example.com': 1,
+    'https://a.example.com/': 1,
+    'https://a.example.com/what/is/happening.html': 1,
+    'https://a.example.com/what/ís/happening.html': 1,
+    'https://a.example.com/what/is/happening.html?what=1&2%20b#höw-about-this=1a': 1,
+    'https://a.example.com/what/is/happéning/?what=1&2%20b#how-aboüt-this=1a': 1,
+    'HTtpS://a.example.com/what/is/happening/?what=1&2%20b#how-about-this=1af&2f%20b': 1,
+    'https://example.com/?what=1#how-about-this=1&2%20baf': 1,
+    'https://example.com?what=1#how-about-this=1&2%20baf': 1,
+    '<test>http://example7.com</test>': 1,
+    'https://<test>': 0,
+    'https://[test]': 0,
+    'http://"test"': 0,
+    'http://\'test\'': 0,
+    '[https://example8.com/what/is/this.php?what=1]': 1,
+    '[and http://example9.com?what=1&other=3#and-thing=2]': 1,
+    '<what>https://example10.com#and-thing=2 "</about>': 1,
+    'abc<this["https://example11.com/what/is#and-thing=2?whoami=23&where=1"]that>def': 1,
+    'sdflkf[what](https://example12.com/who/what.php?whoami=1#whatami=2)?am=hi': 1,
+    '<or>http://examplehttp://15.badc</that>': 2,
+    'https://a.example.com/one.html?url=http://example.com/inside/of/another?=http://': 2,
+    '[https://a.example.com/one.html?url=http://example.com/inside/of/another?=](http://a.example.com)': 3,
+}
+for url_str, num_urls in _test_url_strs.items():
+    assert len(re.findall(URL_REGEX, url_str)) == num_urls, (
+        f'{url_str} does not contain {num_urls} urls')
--- a/archivebox/parsers/generic_html.py
+++ b/archivebox/parsers/generic_html.py
@ -10,7 +10,7 @@ from ..index.schema import Link
 from ..util import (
    htmldecode,
    enforce_types,
-    find_all_urls,
+    URL_REGEX,
 )
 from html.parser import HTMLParser
 from urllib.parse import urljoin
@ -40,22 +40,10 @@ def parse_generic_html_export(html_file: IO[str], root_url: Optional[str]=None,
        parser.feed(line)
        for url in parser.urls:
            if root_url:
-                url_is_absolute = (url.lower().startswith('http://') or url.lower().startswith('https://'))
-                # url = https://abc.com                       => True
-                # url = /page.php?next=https://example.com    => False
+                # resolve relative urls /home.html -> https://example.com/home.html
+                url = urljoin(root_url, url)
            
-                if not url_is_absolute:                       # resolve it by joining it with root_url
-                    relative_path = url
-
-                    url = urljoin(root_url, relative_path)    # https://example.com/somepage.html + /home.html
-                                                              # => https://example.com/home.html
-
-                    # special case to handle bug around // handling, crucial for urls that contain sub-urls
-                    # e.g. https://web.archive.org/web/https://example.com
-                    if did_urljoin_misbehave(root_url, relative_path, url):
-                        url = fix_urljoin_bug(url)
-
-            for archivable_url in find_all_urls(url):
+            for archivable_url in re.findall(URL_REGEX, url):
                yield Link(
                    url=htmldecode(archivable_url),
                    timestamp=str(datetime.now(timezone.utc).timestamp()),
@ -68,74 +56,3 @@ def parse_generic_html_export(html_file: IO[str], root_url: Optional[str]=None,
 KEY = 'html'
 NAME = 'Generic HTML'
 PARSER = parse_generic_html_export
-
-
-#### WORKAROUND CODE FOR https://github.com/python/cpython/issues/96015 ####
-
-def did_urljoin_misbehave(root_url: str, relative_path: str, final_url: str) -> bool:
-    """
-    Handle urljoin edge case bug where multiple slashes get turned into a single slash:
-    - https://github.com/python/cpython/issues/96015
-    - https://github.com/ArchiveBox/ArchiveBox/issues/1411
-
-    This workaround only fixes the most common case of a sub-URL inside an outer URL, e.g.:
-       https://web.archive.org/web/https://example.com/some/inner/url
-
-    But there are other valid URLs containing // that are not fixed by this workaround, e.g.:
-       https://example.com/drives/C//some/file
-    """
-
-    # if relative path is actually an absolute url, cut off its own scheme so we check the path component only
-    relative_path = relative_path.lower()
-    if relative_path.startswith('http://') or relative_path.startswith('https://'):
-        relative_path = relative_path.split('://', 1)[-1]
-
-    # TODO: properly fix all double // getting stripped by urljoin, not just ://
-    original_path_had_suburl = '://' in relative_path
-    original_root_had_suburl = '://' in root_url[8:]     # ignore first 8 chars because root always starts with https://
-    final_joined_has_suburl = '://' in final_url[8:]     # ignore first 8 chars because final always starts with https://
-
-    urljoin_broke_suburls = (
-        (original_root_had_suburl or original_path_had_suburl)
-        and not final_joined_has_suburl
-    )
-    return urljoin_broke_suburls
-
-
-def fix_urljoin_bug(url: str, nesting_limit=5):
-    """
-    recursively replace broken suburls .../http:/... with .../http://...
-
-    basically equivalent to this for 99.9% of cases:
-      url = url.replace('/http:/',  '/http://')
-      url = url.replace('/https:/', '/https://')
-    except this handles:
-        other schemes besides http/https     (e.g. https://example.com/link/git+ssh://github.com/example)
-        other preceding separators besides / (e.g. https://example.com/login/?next=https://example.com/home)
-        fixing multiple suburls recursively
-    """
-    input_url = url
-    for _ in range(nesting_limit):
-        url = re.sub(
-            r'(?P<root>.+?)'                             # https://web.archive.org/web
-            + r'(?P<separator>[-=/_&+%$#@!*\(\\])'       # /
-            + r'(?P<subscheme>[a-zA-Z0-9+_-]{1,32}?):/'  # http:/
-            + r'(?P<suburl>[^/\\]+)',                    # example.com
-            r"\1\2\3://\4",
-            input_url,
-            re.IGNORECASE | re.UNICODE,
-        )
-        if url == input_url:
-            break                                        # nothing left to replace, all suburls are fixed
-        input_url = url
-
-    return url
-
-
-# sanity check to make sure workaround code works as expected and doesnt introduce *more* bugs
-assert did_urljoin_misbehave('https://web.archive.org/web/https://example.com', 'abc.html', 'https://web.archive.org/web/https:/example.com/abc.html') == True
-assert did_urljoin_misbehave('http://example.com', 'https://web.archive.org/web/http://example.com/abc.html', 'https://web.archive.org/web/http:/example.com/abc.html') == True
-assert fix_urljoin_bug('https:/example.com') == 'https:/example.com'   # should not modify original url's scheme, only sub-urls
-assert fix_urljoin_bug('https://web.archive.org/web/https:/example.com/abc.html') == 'https://web.archive.org/web/https://example.com/abc.html'
-assert fix_urljoin_bug('http://example.com/link/git+ssh:/github.com/example?next=ftp:/example.com') == 'http://example.com/link/git+ssh://github.com/example?next=ftp://example.com'
-
--- a/archivebox/parsers/generic_json.py
+++ b/archivebox/parsers/generic_json.py
@ -11,60 +11,6 @@ from ..util import (
    enforce_types,
 )

-# This gets used by generic_jsonl, too
-def jsonObjectToLink(link: str, source: str):
-    json_date = lambda s: datetime.strptime(s, '%Y-%m-%dT%H:%M:%S%z')
-
-    # example line
-    # {"href":"http:\/\/www.reddit.com\/r\/example","description":"title here","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android"}]
-    # Parse URL
-    url = link.get('href') or link.get('url') or link.get('URL')
-    if not url:
-        raise Exception('JSON must contain URL in each entry [{"url": "http://...", ...}, ...]')
-
-    # Parse the timestamp
-    ts_str = str(datetime.now(timezone.utc).timestamp())
-    if link.get('timestamp'):
-        # chrome/ff histories use a very precise timestamp
-        ts_str = str(link['timestamp'] / 10000000)
-    elif link.get('time'):
-        ts_str = str(json_date(link['time'].split(',', 1)[0]).timestamp())
-    elif link.get('created_at'):
-        ts_str = str(json_date(link['created_at']).timestamp())
-    elif link.get('created'):
-        ts_str = str(json_date(link['created']).timestamp())
-    elif link.get('date'):
-        ts_str = str(json_date(link['date']).timestamp())
-    elif link.get('bookmarked'):
-        ts_str = str(json_date(link['bookmarked']).timestamp())
-    elif link.get('saved'):
-        ts_str = str(json_date(link['saved']).timestamp())
-
-    # Parse the title
-    title = None
-    if link.get('title'):
-        title = link['title'].strip()
-    elif link.get('description'):
-        title = link['description'].replace(' — Readability', '').strip()
-    elif link.get('name'):
-        title = link['name'].strip()
-
-    # if we have a list, join it with commas
-    tags = link.get('tags')
-    if type(tags) == list:
-        tags = ','.join(tags)
-    elif type(tags) == str:
-        # if there's no comma, assume it was space-separated
-        if ',' not in tags:
-            tags = tags.replace(' ', ',')
-
-    return Link(
-        url=htmldecode(url),
-        timestamp=ts_str,
-        title=htmldecode(title) or None,
-        tags=htmldecode(tags),
-        sources=[source],
-    )

@enforce_types
 def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]:
@ -72,13 +18,55 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]:

    json_file.seek(0)

-    links = json.load(json_file)
-    if type(links) != list:
-        raise Exception('JSON parser expects list of objects, maybe this is JSONL?')
+    # sometimes the first line is a comment or filepath, so we get everything after the first {
+    json_file_json_str = '{' + json_file.read().split('{', 1)[-1]
+    links = json.loads(json_file_json_str)
+    json_date = lambda s: datetime.strptime(s, '%Y-%m-%dT%H:%M:%S%z')

    for link in links:
+        # example line
+        # {"href":"http:\/\/www.reddit.com\/r\/example","description":"title here","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android"}]
        if link:
-            yield jsonObjectToLink(link, json_file.name)
+            # Parse URL
+            url = link.get('href') or link.get('url') or link.get('URL')
+            if not url:
+                raise Exception('JSON must contain URL in each entry [{"url": "http://...", ...}, ...]')
+
+            # Parse the timestamp
+            ts_str = str(datetime.now(timezone.utc).timestamp())
+            if link.get('timestamp'):
+                # chrome/ff histories use a very precise timestamp
+                ts_str = str(link['timestamp'] / 10000000)  
+            elif link.get('time'):
+                ts_str = str(json_date(link['time'].split(',', 1)[0]).timestamp())
+            elif link.get('created_at'):
+                ts_str = str(json_date(link['created_at']).timestamp())
+            elif link.get('created'):
+                ts_str = str(json_date(link['created']).timestamp())
+            elif link.get('date'):
+                ts_str = str(json_date(link['date']).timestamp())
+            elif link.get('bookmarked'):
+                ts_str = str(json_date(link['bookmarked']).timestamp())
+            elif link.get('saved'):
+                ts_str = str(json_date(link['saved']).timestamp())
+            
+            # Parse the title
+            title = None
+            if link.get('title'):
+                title = link['title'].strip()
+            elif link.get('description'):
+                title = link['description'].replace(' — Readability', '').strip()
+            elif link.get('name'):
+                title = link['name'].strip()
+
+            yield Link(
+                url=htmldecode(url),
+                timestamp=ts_str,
+                title=htmldecode(title) or None,
+                tags=htmldecode(link.get('tags')) or '',
+                sources=[json_file.name],
+            )
+

 KEY = 'json'
 NAME = 'Generic JSON'
--- a/archivebox/parsers/generic_jsonl.py
+++ b/archivebox/parsers/generic_jsonl.py
@ -1,32 +0,0 @@
-__package__ = 'archivebox.parsers'
-
-import json
-
-from typing import IO, Iterable
-
-from ..index.schema import Link
-from ..util import (
-    enforce_types,
-)
-
-from .generic_json import jsonObjectToLink
-
-def parse_line(line: str):
-    if line.strip() != "":
-        return json.loads(line)
-
-@enforce_types
-def parse_generic_jsonl_export(json_file: IO[str], **_kwargs) -> Iterable[Link]:
-    """Parse JSONL format bookmarks export files"""
-
-    json_file.seek(0)
-
-    links = [ parse_line(line) for line in json_file ]
-
-    for link in links:
-        if link:
-            yield jsonObjectToLink(link,json_file.name)
-
-KEY = 'jsonl'
-NAME = 'Generic JSONL'
-PARSER = parse_generic_jsonl_export
--- a/archivebox/parsers/generic_rss.py
+++ b/archivebox/parsers/generic_rss.py
@ -2,13 +2,13 @@ __package__ = 'archivebox.parsers'


 from typing import IO, Iterable
-from time import mktime
-from feedparser import parse as feedparser
+from datetime import datetime

 from ..index.schema import Link
 from ..util import (
    htmldecode,
-    enforce_types
+    enforce_types,
+    str_between,
 )

@enforce_types
@ -16,27 +16,35 @@ def parse_generic_rss_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]:
    """Parse RSS XML-format files into links"""

    rss_file.seek(0)
-    feed = feedparser(rss_file.read())
-    for item in feed.entries:
-        url = item.link
-        title = item.title
-        time = mktime(item.updated_parsed)
+    items = rss_file.read().split('<item>')
+    items = items[1:] if items else []
+    for item in items:
+        # example item:
+        # <item>
+        # <title><![CDATA[How JavaScript works: inside the V8 engine]]></title>
+        # <category>Unread</category>
+        # <link>https://blog.sessionstack.com/how-javascript-works-inside</link>
+        # <guid>https://blog.sessionstack.com/how-javascript-works-inside</guid>
+        # <pubDate>Mon, 21 Aug 2017 14:21:58 -0500</pubDate>
+        # </item>

-        try:
-            tags = ','.join(map(lambda tag: tag.term, item.tags))
-        except AttributeError:
-            tags = ''
+        trailing_removed = item.split('</item>', 1)[0]
+        leading_removed = trailing_removed.split('<item>', 1)[-1].strip()
+        rows = leading_removed.split('\n')

-        if url is None:
-            # Yielding a Link with no URL will
-            # crash on a URL validation assertion
-            continue
+        def get_row(key):
+            return [r for r in rows if r.strip().startswith('<{}>'.format(key))][0]
+
+        url = str_between(get_row('link'), '<link>', '</link>')
+        ts_str = str_between(get_row('pubDate'), '<pubDate>', '</pubDate>')
+        time = datetime.strptime(ts_str, "%a, %d %b %Y %H:%M:%S %z")
+        title = str_between(get_row('title'), '<![CDATA[', ']]').strip()

        yield Link(
            url=htmldecode(url),
-            timestamp=str(time),
+            timestamp=str(time.timestamp()),
            title=htmldecode(title) or None,
-            tags=tags,
+            tags=None,
            sources=[rss_file.name],
        )

--- a/archivebox/parsers/generic_txt.py
+++ b/archivebox/parsers/generic_txt.py
@ -1,6 +1,8 @@
 __package__ = 'archivebox.parsers'
 __description__ = 'Plain Text'

+import re
+
 from typing import IO, Iterable
 from datetime import datetime, timezone
 from pathlib import Path
@ -9,7 +11,7 @@ from ..index.schema import Link
 from ..util import (
    htmldecode,
    enforce_types,
-    find_all_urls,
+    URL_REGEX
 )


@ -37,7 +39,7 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]:
            pass

        # otherwise look for anything that looks like a URL in the line
-        for url in find_all_urls(line):
+        for url in re.findall(URL_REGEX, line):
            yield Link(
                url=htmldecode(url),
                timestamp=str(datetime.now(timezone.utc).timestamp()),
@ -46,6 +48,17 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]:
                sources=[text_file.name],
            )

+            # look inside the URL for any sub-urls, e.g. for archive.org links
+            # https://web.archive.org/web/20200531203453/https://www.reddit.com/r/socialism/comments/gu24ke/nypd_officers_claim_they_are_protecting_the_rule/fsfq0sw/
+            # -> https://www.reddit.com/r/socialism/comments/gu24ke/nypd_officers_claim_they_are_protecting_the_rule/fsfq0sw/
+            for sub_url in re.findall(URL_REGEX, line[1:]):
+                yield Link(
+                    url=htmldecode(sub_url),
+                    timestamp=str(datetime.now(timezone.utc).timestamp()),
+                    title=None,
+                    tags=None,
+                    sources=[text_file.name],
+                )

 KEY = 'txt'
 NAME = 'Generic TXT'
--- a/archivebox/parsers/pinboard_rss.py
+++ b/archivebox/parsers/pinboard_rss.py
@ -2,41 +2,50 @@ __package__ = 'archivebox.parsers'


 from typing import IO, Iterable
-from time import mktime
-from feedparser import parse as feedparser
+from datetime import datetime, timezone
+
+from xml.etree import ElementTree

 from ..index.schema import Link
 from ..util import (
    htmldecode,
-    enforce_types
+    enforce_types,
 )

+
@enforce_types
 def parse_pinboard_rss_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]:
    """Parse Pinboard RSS feed files into links"""

    rss_file.seek(0)
-    feed = feedparser(rss_file.read())
-    for item in feed.entries:
-        url = item.link
-        # title will start with "[priv] " if pin was marked private. useful?
-        title = item.title
-        time = mktime(item.updated_parsed)
+    root = ElementTree.parse(rss_file).getroot()
+    items = root.findall("{http://purl.org/rss/1.0/}item")
+    for item in items:
+        find = lambda p: item.find(p).text.strip() if item.find(p) is not None else None    # type: ignore

-        # all tags are in one entry.tags with spaces in it. annoying!
-        try:
-            tags = item.tags[0].term.replace(' ', ',')
-        except AttributeError:
-            tags = ''
+        url = find("{http://purl.org/rss/1.0/}link")
+        tags = find("{http://purl.org/dc/elements/1.1/}subject")
+        title = find("{http://purl.org/rss/1.0/}title")
+        ts_str = find("{http://purl.org/dc/elements/1.1/}date")
        
        if url is None:
            # Yielding a Link with no URL will
            # crash on a URL validation assertion
            continue

+        # Pinboard includes a colon in its date stamp timezone offsets, which
+        # Python can't parse. Remove it:
+        if ts_str and ts_str[-3:-2] == ":":
+            ts_str = ts_str[:-3]+ts_str[-2:]
+
+        if ts_str:
+            time = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S%z")
+        else:
+            time = datetime.now(timezone.utc)
+
        yield Link(
            url=htmldecode(url),
-            timestamp=str(time),
+            timestamp=str(time.timestamp()),
            title=htmldecode(title) or None,
            tags=htmldecode(tags) or None,
            sources=[rss_file.name],
--- a/archivebox/plugantic/init.py
+++ b/archivebox/plugantic/init.py
@ -1,17 +0,0 @@
-__package__ = 'archivebox.plugantic'
-
-from .binproviders import BinProvider
-from .binaries import Binary
-from .extractors import Extractor
-from .replayers import Replayer
-from .configs import ConfigSet
-from .plugins import Plugin
-
-# __all__ = [
-#     'BinProvider',
-#     'Binary',
-#     'Extractor',
-#     'Replayer',
-#     'ConfigSet',
-#     'Plugin',
-# ]
--- a/archivebox/plugantic/admin.py
+++ b/archivebox/plugantic/admin.py
@ -1,26 +0,0 @@
-# from django.contrib import admin
-# from django import forms
-
-# from django_jsonform.widgets import JSONFormWidget
-
-# from django_pydantic_field.v2.fields import PydanticSchemaField
-
-# from .models import CustomPlugin
-
-
-# class PluginForm(forms.ModelForm):
-#     class Meta:
-#         model = CustomPlugin
-#         fields = '__all__'
-#         widgets = {
-#             'items': JSONFormWidget(schema=PluginSchema),
-#         }
-
-
-# class PluginAdmin(admin.ModelAdmin):
-#     formfield_overrides = {
-#         PydanticSchemaField: {"widget": JSONFormWidget},
-#     }
-#     form = PluginForm
-
-    
--- a/archivebox/plugantic/apps.py
+++ b/archivebox/plugantic/apps.py
@ -1,6 +0,0 @@
-from django.apps import AppConfig
-
-
-class PluganticConfig(AppConfig):
-    default_auto_field = 'django.db.models.BigAutoField'
-    name = 'plugantic'
--- a/archivebox/plugantic/binaries.py
+++ b/archivebox/plugantic/binaries.py
@ -1,323 +0,0 @@
-__package__ = 'archivebox.plugantic'
-
-import sys
-import inspect
-import importlib
-from pathlib import Path
-
-
-from typing import Any, Optional, Dict, List
-from typing_extensions import Self
-from subprocess import run, PIPE
-
-
-from pydantic_core import ValidationError
-
-from pydantic import BaseModel, Field, model_validator, computed_field, field_validator, validate_call, field_serializer
-
-from .binproviders import (
-    SemVer,
-    BinName,
-    BinProviderName,
-    HostBinPath,
-    BinProvider,
-    EnvProvider,
-    AptProvider,
-    BrewProvider,
-    PipProvider,
-    ProviderLookupDict,
-    bin_name,
-    bin_abspath,
-    path_is_script,
-    path_is_executable,
-)
-
-
-class Binary(BaseModel):
-    name: BinName
-    description: str = Field(default='')
-
-    providers_supported: List[BinProvider] = Field(default=[EnvProvider()], alias='providers')
-    provider_overrides: Dict[BinProviderName, ProviderLookupDict] = Field(default={}, alias='overrides')
-    
-    loaded_provider: Optional[BinProviderName] = Field(default=None, alias='provider')
-    loaded_abspath: Optional[HostBinPath] = Field(default=None, alias='abspath')
-    loaded_version: Optional[SemVer] = Field(default=None, alias='version')
-    
-    # bin_filename:  see below
-    # is_executable: see below
-    # is_script
-    # is_valid: see below
-
-
-    @model_validator(mode='after')
-    def validate(self):
-        self.loaded_abspath = bin_abspath(self.name) or self.name
-        self.description = self.description or self.name
-        
-        assert self.providers_supported, f'No providers were given for package {self.name}'
-
-        # pull in any overrides from the binproviders
-        for provider in self.providers_supported:
-            overrides_by_provider = provider.get_providers_for_bin(self.name)
-            if overrides_by_provider:
-                self.provider_overrides[provider.name] = {
-                    **overrides_by_provider,
-                    **self.provider_overrides.get(provider.name, {}),
-                }
-        return self
-
-    @field_validator('loaded_abspath', mode='before')
-    def parse_abspath(cls, value: Any):
-        return bin_abspath(value)
-
-    @field_validator('loaded_version', mode='before')
-    def parse_version(cls, value: Any):
-        return value and SemVer(value)
-
-    @field_serializer('provider_overrides', when_used='json')
-    def serialize_overrides(self, provider_overrides: Dict[BinProviderName, ProviderLookupDict]) -> Dict[BinProviderName, Dict[str, str]]:
-        return {
-            provider_name: {
-                key: str(val)
-                for key, val in overrides.items()
-            }
-            for provider_name, overrides in provider_overrides.items()
-        }
-
-    @computed_field                                                                                           # type: ignore[misc]  # see mypy issue #1362
-    @property
-    def bin_filename(self) -> BinName:
-        if self.is_script:
-            # e.g. '.../Python.framework/Versions/3.11/lib/python3.11/sqlite3/__init__.py' -> sqlite
-            name = self.name
-        elif self.loaded_abspath:
-            # e.g. '/opt/homebrew/bin/wget' -> wget
-            name = bin_name(self.loaded_abspath)
-        else:
-            # e.g. 'ytdlp' -> 'yt-dlp'
-            name = bin_name(self.name)
-        return name
-
-    @computed_field                                                                                           # type: ignore[misc]  # see mypy issue #1362
-    @property
-    def is_executable(self) -> bool:
-        try:
-            assert self.loaded_abspath and path_is_executable(self.loaded_abspath)
-            return True
-        except (ValidationError, AssertionError):
-            return False
-
-    @computed_field                                                                                           # type: ignore[misc]  # see mypy issue #1362
-    @property
-    def is_script(self) -> bool:
-        try:
-            assert self.loaded_abspath and path_is_script(self.loaded_abspath)
-            return True
-        except (ValidationError, AssertionError):
-            return False
-
-    @computed_field                                                                                           # type: ignore[misc]  # see mypy issue #1362
-    @property
-    def is_valid(self) -> bool:
-        return bool(
-            self.name
-            and self.loaded_abspath
-            and self.loaded_version
-            and (self.is_executable or self.is_script)
-        )
-
-    @validate_call
-    def install(self) -> Self:
-        if not self.providers_supported:
-            return self
-
-        exc = Exception('No providers were able to install binary', self.name, self.providers_supported)
-        for provider in self.providers_supported:
-            try:
-                installed_bin = provider.install(self.name, overrides=self.provider_overrides.get(provider.name))
-                if installed_bin:
-                    # print('INSTALLED', self.name, installed_bin)
-                    return self.model_copy(update={
-                        'loaded_provider': provider.name,
-                        'loaded_abspath': installed_bin.abspath,
-                        'loaded_version': installed_bin.version,
-                    })
-            except Exception as err:
-                print(err)
-                exc = err
-        raise exc
-
-    @validate_call
-    def load(self, cache=True) -> Self:
-        if self.is_valid:
-            return self
-
-        if not self.providers_supported:
-            return self
-
-        exc = Exception('No providers were able to install binary', self.name, self.providers_supported)
-        for provider in self.providers_supported:
-            try:
-                installed_bin = provider.load(self.name, cache=cache, overrides=self.provider_overrides.get(provider.name))
-                if installed_bin:
-                    # print('LOADED', provider, self.name, installed_bin)
-                    return self.model_copy(update={
-                        'loaded_provider': provider.name,
-                        'loaded_abspath': installed_bin.abspath,
-                        'loaded_version': installed_bin.version,
-                    })
-            except Exception as err:
-                print(err)
-                exc = err
-        raise exc
-
-    @validate_call
-    def load_or_install(self, cache=True) -> Self:
-        if self.is_valid:
-            return self
-
-        if not self.providers_supported:
-            return self
-
-        exc = Exception('No providers were able to install binary', self.name, self.providers_supported)
-        for provider in self.providers_supported:
-            try:
-                installed_bin = provider.load_or_install(self.name, overrides=self.provider_overrides.get(provider.name), cache=cache)
-                if installed_bin:
-                    # print('LOADED_OR_INSTALLED', self.name, installed_bin)
-                    return self.model_copy(update={
-                        'loaded_provider': provider.name,
-                        'loaded_abspath': installed_bin.abspath,
-                        'loaded_version': installed_bin.version,
-                    })
-            except Exception as err:
-                print(err)
-                exc = err
-        raise exc
-
-    @validate_call
-    def exec(self, args=(), pwd='.'):
-        assert self.loaded_abspath
-        assert self.loaded_version
-        return run([self.loaded_abspath, *args], stdout=PIPE, stderr=PIPE, pwd=pwd)
-
-
-
-
-class SystemPythonHelpers:
-    @staticmethod
-    def get_subdeps() -> str:
-        return 'python3 python3-minimal python3-pip python3-virtualenv'
-
-    @staticmethod
-    def get_abspath() -> str:
-        return sys.executable
-    
-    @staticmethod
-    def get_version() -> str:
-        return '{}.{}.{}'.format(*sys.version_info[:3])
-
-
-class SqliteHelpers:
-    @staticmethod
-    def get_abspath() -> Path:
-        import sqlite3
-        importlib.reload(sqlite3)
-        return Path(inspect.getfile(sqlite3))
-
-    @staticmethod
-    def get_version() -> SemVer:
-        import sqlite3
-        importlib.reload(sqlite3)
-        version = sqlite3.version
-        assert version
-        return SemVer(version)
-
-class DjangoHelpers:
-    @staticmethod
-    def get_django_abspath() -> str:
-        import django
-        return inspect.getfile(django)
-    
-
-    @staticmethod
-    def get_django_version() -> str:
-        import django
-        return '{}.{}.{} {} ({})'.format(*django.VERSION)
-
-class YtdlpHelpers:
-    @staticmethod
-    def get_ytdlp_subdeps() -> str:
-        return 'yt-dlp ffmpeg'
-
-    @staticmethod
-    def get_ytdlp_version() -> str:
-        import yt_dlp
-        importlib.reload(yt_dlp)
-
-        version = yt_dlp.version.__version__
-        assert version
-        return version
-
-class PythonBinary(Binary):
-    name: BinName = 'python'
-
-    providers_supported: List[BinProvider] = [
-        EnvProvider(
-            subdeps_provider={'python': 'plugantic.binaries.SystemPythonHelpers.get_subdeps'},
-            abspath_provider={'python': 'plugantic.binaries.SystemPythonHelpers.get_abspath'},
-            version_provider={'python': 'plugantic.binaries.SystemPythonHelpers.get_version'},
-        ),
-    ]
-
-class SqliteBinary(Binary):
-    name: BinName = 'sqlite'
-    providers_supported: List[BinProvider] = [
-        EnvProvider(
-            version_provider={'sqlite': 'plugantic.binaries.SqliteHelpers.get_version'},
-            abspath_provider={'sqlite': 'plugantic.binaries.SqliteHelpers.get_abspath'},
-        ),
-    ]
-
-class DjangoBinary(Binary):
-    name: BinName = 'django'
-    providers_supported: List[BinProvider] = [
-        EnvProvider(
-            abspath_provider={'django': 'plugantic.binaries.DjangoHelpers.get_django_abspath'},
-            version_provider={'django': 'plugantic.binaries.DjangoHelpers.get_django_version'},
-        ),
-    ]
-
-
-
-
-
-class YtdlpBinary(Binary):
-    name: BinName = 'yt-dlp'
-    providers_supported: List[BinProvider] = [
-        # EnvProvider(),
-        PipProvider(version_provider={'yt-dlp': 'plugantic.binaries.YtdlpHelpers.get_ytdlp_version'}),
-        BrewProvider(subdeps_provider={'yt-dlp': 'plugantic.binaries.YtdlpHelpers.get_ytdlp_subdeps'}),
-        # AptProvider(subdeps_provider={'yt-dlp': lambda: 'yt-dlp ffmpeg'}),
-    ]
-
-
-class WgetBinary(Binary):
-    name: BinName = 'wget'
-    providers_supported: List[BinProvider] = [EnvProvider(), AptProvider()]
-
-
-# if __name__ == '__main__':
-#     PYTHON_BINARY = PythonBinary()
-#     SQLITE_BINARY = SqliteBinary()
-#     DJANGO_BINARY = DjangoBinary()
-#     WGET_BINARY = WgetBinary()
-#     YTDLP_BINARY = YtdlpPBinary()
-
-#     print('-------------------------------------DEFINING BINARIES---------------------------------')
-#     print(PYTHON_BINARY)
-#     print(SQLITE_BINARY)
-#     print(DJANGO_BINARY)
-#     print(WGET_BINARY)
-#     print(YTDLP_BINARY)
--- a/archivebox/plugantic/binproviders.py
+++ b/archivebox/plugantic/binproviders.py
@ -1,561 +0,0 @@
-__package__ = 'archivebox.plugantic'
-
-import os
-import shutil
-import operator
-
-from typing import Callable, Any, Optional, Type, Dict, Annotated, ClassVar, Literal, cast, TYPE_CHECKING
-from typing_extensions import Self
-from abc import ABC, abstractmethod
-from collections import namedtuple
-from pathlib import Path
-from subprocess import run, PIPE
-
-from pydantic_core import core_schema, ValidationError
-from pydantic import BaseModel, Field, TypeAdapter, AfterValidator, validate_call, GetCoreSchemaHandler
-
-
-
-def func_takes_args_or_kwargs(lambda_func: Callable[..., Any]) -> bool:
-    """returns True if a lambda func takes args/kwargs of any kind, otherwise false if it's pure/argless"""
-    code = lambda_func.__code__
-    has_args = code.co_argcount > 0
-    has_varargs = code.co_flags & 0x04 != 0
-    has_varkw = code.co_flags & 0x08 != 0
-    return has_args or has_varargs or has_varkw
-
-
-def is_semver_str(semver: Any) -> bool:
-    if isinstance(semver, str):
-        return (semver.count('.') == 2 and semver.replace('.', '').isdigit())
-    return False
-
-def semver_to_str(semver: tuple[int, int, int] | str) -> str:
-    if isinstance(semver, (list, tuple)):
-        return '.'.join(str(chunk) for chunk in semver)
-    if is_semver_str(semver):
-        return semver
-    raise ValidationError('Tried to convert invalid SemVer: {}'.format(semver))
-
-
-SemVerTuple = namedtuple('SemVerTuple', ('major', 'minor', 'patch'), defaults=(0, 0, 0))
-SemVerParsableTypes = str | tuple[str | int, ...] | list[str | int]
-
-class SemVer(SemVerTuple):
-    major: int
-    minor: int = 0
-    patch: int = 0
-
-    if TYPE_CHECKING:
-        full_text: str | None = ''
-
-    def __new__(cls, *args, full_text=None, **kwargs):
-        # '1.1.1'
-        if len(args) == 1 and is_semver_str(args[0]):
-            result = SemVer.parse(args[0])
-
-        # ('1', '2', '3')
-        elif len(args) == 1 and isinstance(args[0], (tuple, list)):
-            result = SemVer.parse(args[0])
-
-        # (1, '2', None)
-        elif not all(isinstance(arg, (int, type(None))) for arg in args):
-            result = SemVer.parse(args)
-
-        # (None)
-        elif all(chunk in ('', 0, None) for chunk in (*args, *kwargs.values())):
-            result = None
-
-        # 1, 2, 3
-        else:
-            result = SemVerTuple.__new__(cls, *args, **kwargs)
-
-        if result is not None:
-            # add first line as extra hidden metadata so it can be logged without having to re-run version cmd
-            result.full_text = full_text or str(result)
-        return result
-
-    @classmethod
-    def parse(cls, version_stdout: SemVerParsableTypes) -> Self | None:
-        """
-        parses a version tag string formatted like into (major, minor, patch) ints
-        'Google Chrome 124.0.6367.208'             -> (124, 0, 6367)
-        'GNU Wget 1.24.5 built on darwin23.2.0.'   -> (1, 24, 5)
-        'curl 8.4.0 (x86_64-apple-darwin23.0) ...' -> (8, 4, 0)
-        '2024.04.09'                               -> (2024, 4, 9)
-
-        """
-        # print('INITIAL_VALUE', type(version_stdout).__name__, version_stdout)
-
-        if isinstance(version_stdout, (tuple, list)):
-            version_stdout = '.'.join(str(chunk) for chunk in version_stdout)
-        elif isinstance(version_stdout, bytes):
-            version_stdout = version_stdout.decode()
-        elif not isinstance(version_stdout, str):
-            version_stdout = str(version_stdout)
-        
-        # no text to work with, return None immediately
-        if not version_stdout.strip():
-            # raise Exception('Tried to parse semver from empty version output (is binary installed and available?)')
-            return None
-
-        just_numbers = lambda col: col.lower().strip('v').split('+')[0].split('-')[0].split('_')[0]
-        contains_semver = lambda col: (
-            col.count('.') in (1, 2, 3)
-            and all(chunk.isdigit() for chunk in col.split('.')[:3])  # first 3 chunks can only be nums
-        )
-
-        full_text = version_stdout.split('\n')[0].strip()
-        first_line_columns = full_text.split()[:4]
-        version_columns = list(filter(contains_semver, map(just_numbers, first_line_columns)))
-        
-        # could not find any column of first line that looks like a version number, despite there being some text
-        if not version_columns:
-            # raise Exception('Failed to parse semver from version command output: {}'.format(' '.join(first_line_columns)))
-            return None
-
-        # take first col containing a semver, and truncate it to 3 chunks (e.g. 2024.04.09.91) -> (2024, 04, 09)
-        first_version_tuple = version_columns[0].split('.', 3)[:3]
-
-        # print('FINAL_VALUE', first_version_tuple)
-
-        return cls(*(int(chunk) for chunk in first_version_tuple), full_text=full_text)
-
-    def __str__(self):
-        return '.'.join(str(chunk) for chunk in self)
-
-    # @classmethod
-    # def __get_pydantic_core_schema__(cls, source: Type[Any], handler: GetCoreSchemaHandler) -> core_schema.CoreSchema:
-    #     default_schema = handler(source)
-    #     return core_schema.no_info_after_validator_function(
-    #         cls.parse,
-    #         default_schema,
-    #         serialization=core_schema.plain_serializer_function_ser_schema(
-    #             lambda semver: str(semver),
-    #             info_arg=False,
-    #             return_schema=core_schema.str_schema(),
-    #         ),
-    #     )
-
-assert SemVer(None) == None
-assert SemVer('') == None
-assert SemVer.parse('') == None
-assert SemVer(1) == (1, 0, 0)
-assert SemVer(1, 2) == (1, 2, 0)
-assert SemVer('1.2+234234') == (1, 2, 0)
-assert SemVer((1, 2, 3)) == (1, 2, 3)
-assert getattr(SemVer((1, 2, 3)), 'full_text') == '1.2.3'
-assert SemVer(('1', '2', '3')) == (1, 2, 3)
-assert SemVer.parse('5.6.7') == (5, 6, 7)
-assert SemVer.parse('124.0.6367.208') == (124, 0, 6367)
-assert SemVer.parse('Google Chrome 124.1+234.234') == (124, 1, 0)
-assert SemVer.parse('Google Ch1rome 124.0.6367.208') == (124, 0, 6367)
-assert SemVer.parse('Google Chrome 124.0.6367.208+beta_234. 234.234.123\n123.456.324') == (124, 0, 6367)
-assert getattr(SemVer.parse('Google Chrome 124.0.6367.208+beta_234. 234.234.123\n123.456.324'), 'full_text') == 'Google Chrome 124.0.6367.208+beta_234. 234.234.123'
-assert SemVer.parse('Google Chrome') == None
-
-@validate_call
-def bin_name(bin_path_or_name: str | Path) -> str:
-    name = Path(bin_path_or_name).name
-    assert len(name) > 1
-    assert name.replace('-', '').replace('_', '').replace('.', '').isalnum(), (
-        f'Binary name can only contain a-Z0-9-_.: {name}')
-    return name
-
-BinName = Annotated[str, AfterValidator(bin_name)]
-
-@validate_call
-def path_is_file(path: Path | str) -> Path:
-    path = Path(path) if isinstance(path, str) else path
-    assert path.is_file(), f'Path is not a file: {path}'
-    return path
-
-HostExistsPath = Annotated[Path, AfterValidator(path_is_file)]
-
-@validate_call
-def path_is_executable(path: HostExistsPath) -> HostExistsPath:
-    assert os.access(path, os.X_OK), f'Path is not executable (fix by running chmod +x {path})'
-    return path
-
-@validate_call
-def path_is_script(path: HostExistsPath) -> HostExistsPath:
-    SCRIPT_EXTENSIONS = ('.py', '.js', '.sh')
-    assert path.suffix.lower() in SCRIPT_EXTENSIONS, 'Path is not a script (does not end in {})'.format(', '.join(SCRIPT_EXTENSIONS))
-    return path
-
-HostExecutablePath = Annotated[HostExistsPath, AfterValidator(path_is_executable)]
-
-@validate_call
-def path_is_abspath(path: Path) -> Path:
-    return path.resolve()
-
-HostAbsPath = Annotated[HostExistsPath, AfterValidator(path_is_abspath)]
-HostBinPath = Annotated[Path, AfterValidator(path_is_abspath), AfterValidator(path_is_file)]
-
-
-@validate_call
-def bin_abspath(bin_path_or_name: BinName | Path) -> HostBinPath | None:
-    assert bin_path_or_name
-
-    if str(bin_path_or_name).startswith('/'):
-        # already a path, get its absolute form
-        abspath = Path(bin_path_or_name).resolve()
-    else:
-        # not a path yet, get path using os.which
-        binpath = shutil.which(bin_path_or_name)
-        if not binpath:
-            return None
-        abspath = Path(binpath).resolve()
-
-    try:
-        return TypeAdapter(HostBinPath).validate_python(abspath)
-    except ValidationError:
-        return None
-
-
-@validate_call
-def bin_version(bin_path: HostBinPath, args=('--version',)) -> SemVer | None:
-    return SemVer(run([bin_path, *args], stdout=PIPE).stdout.strip().decode())
-
-
-class InstalledBin(BaseModel):
-    abspath: HostBinPath
-    version: SemVer
-
-
-def is_valid_install_string(pkgs_str: str) -> str:
-    """Make sure a string is a valid install string for a package manager, e.g. 'yt-dlp ffmpeg'"""
-    assert pkgs_str
-    assert all(len(pkg) > 1 for pkg in pkgs_str.split(' '))
-    return pkgs_str
-
-def is_valid_python_dotted_import(import_str: str) -> str:
-    assert import_str and import_str.replace('.', '').replace('_', '').isalnum()
-    return import_str
-
-InstallStr = Annotated[str, AfterValidator(is_valid_install_string)]
-
-LazyImportStr = Annotated[str, AfterValidator(is_valid_python_dotted_import)]
-
-ProviderHandler = Callable[..., Any] | Callable[[], Any]                               # must take no args [], or [bin_name: str, **kwargs]
-#ProviderHandlerStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))]
-ProviderHandlerRef = LazyImportStr | ProviderHandler
-ProviderLookupDict = Dict[str, LazyImportStr]
-ProviderType = Literal['abspath', 'version', 'subdeps', 'install']
-
-
-# class Host(BaseModel):
-#     machine: str
-#     system: str
-#     platform: str
-#     in_docker: bool
-#     in_qemu: bool
-#     python: str
-
-BinProviderName = Literal['env', 'pip', 'apt', 'brew', 'npm', 'vendor']
-
-
-class BinProvider(ABC, BaseModel):
-    name: BinProviderName
-    
-    abspath_provider: ProviderLookupDict = Field(default={'*': 'self.on_get_abspath'}, exclude=True)
-    version_provider: ProviderLookupDict = Field(default={'*': 'self.on_get_version'}, exclude=True)
-    subdeps_provider: ProviderLookupDict = Field(default={'*': 'self.on_get_subdeps'}, exclude=True)
-    install_provider: ProviderLookupDict = Field(default={'*': 'self.on_install'}, exclude=True)
-
-    _abspath_cache: ClassVar = {}
-    _version_cache: ClassVar = {}
-    _install_cache: ClassVar = {}
-
-    # def provider_version(self) -> SemVer | None:
-    #     """Version of the actual underlying package manager (e.g. pip v20.4.1)"""
-    #     if self.name in ('env', 'vendor'):
-    #         return SemVer('0.0.0')
-    #     installer_binpath = Path(shutil.which(self.name)).resolve()
-    #     return bin_version(installer_binpath)
-
-    # def provider_host(self) -> Host:
-    #     """Information about the host env, archictecture, and OS needed to select & build packages"""
-    #     p = platform.uname()
-    #     return Host(
-    #         machine=p.machine,
-    #         system=p.system,
-    #         platform=platform.platform(),
-    #         python=sys.implementation.name,
-    #         in_docker=os.environ.get('IN_DOCKER', '').lower() == 'true',
-    #         in_qemu=os.environ.get('IN_QEMU', '').lower() == 'true',
-    #     )
-
-    def get_default_providers(self):
-        return self.get_providers_for_bin('*')
-
-    def resolve_provider_func(self, provider_func: ProviderHandlerRef | None) -> ProviderHandler | None:
-        if provider_func is None:
-            return None
-
-        # if provider_func is a dotted path to a function on self, swap it for the actual function
-        if isinstance(provider_func, str) and provider_func.startswith('self.'):
-            provider_func = getattr(self, provider_func.split('self.', 1)[-1])
-
-        # if provider_func is a dot-formatted import string, import the function
-        if isinstance(provider_func, str):
-            from django.utils.module_loading import import_string
-
-            package_name, module_name, classname, path = provider_func.split('.', 3)   # -> abc, def, ghi.jkl
-
-            # get .ghi.jkl nested attr present on module abc.def
-            imported_module = import_string(f'{package_name}.{module_name}.{classname}')
-            provider_func = operator.attrgetter(path)(imported_module)
-
-            # # abc.def.ghi.jkl  -> 1, 2, 3
-            # for idx in range(1, len(path)):
-            #     parent_path = '.'.join(path[:-idx])  # abc.def.ghi
-            #     try:
-            #         parent_module = import_string(parent_path)
-            #         provider_func = getattr(parent_module, path[-idx])
-            #     except AttributeError, ImportError:
-            #         continue
-
-        assert TypeAdapter(ProviderHandler).validate_python(provider_func), (
-            f'{self.__class__.__name__} provider func for {bin_name} was not a function or dotted-import path: {provider_func}')
-
-        return provider_func
-
-    @validate_call
-    def get_providers_for_bin(self, bin_name: str) -> ProviderLookupDict:
-        providers_for_bin = {
-            'abspath': self.abspath_provider.get(bin_name),
-            'version': self.version_provider.get(bin_name),
-            'subdeps': self.subdeps_provider.get(bin_name),
-            'install': self.install_provider.get(bin_name),
-        }
-        only_set_providers_for_bin = {k: v for k, v in providers_for_bin.items() if v is not None}
-        
-        return only_set_providers_for_bin
-
-    @validate_call
-    def get_provider_for_action(self, bin_name: BinName, provider_type: ProviderType, default_provider: Optional[ProviderHandlerRef]=None, overrides: Optional[ProviderLookupDict]=None) -> ProviderHandler:
-        """
-        Get the provider func for a given key + Dict of provider callbacks + fallback default provider.
-        e.g. get_provider_for_action(bin_name='yt-dlp', 'install', default_provider=self.on_install, ...) -> Callable
-        """
-
-        provider_func_ref = (
-            (overrides or {}).get(provider_type)
-            or self.get_providers_for_bin(bin_name).get(provider_type)
-            or self.get_default_providers().get(provider_type)
-            or default_provider
-        )
-        # print('getting provider for action', bin_name, provider_type, provider_func)
-
-        provider_func = self.resolve_provider_func(provider_func_ref)
-
-        assert provider_func, f'No {self.name} provider func was found for {bin_name} in: {self.__class__.__name__}.'
-
-        return provider_func
-
-    @validate_call
-    def call_provider_for_action(self, bin_name: BinName, provider_type: ProviderType, default_provider: Optional[ProviderHandlerRef]=None, overrides: Optional[ProviderLookupDict]=None, **kwargs) -> Any:
-        provider_func: ProviderHandler = self.get_provider_for_action(
-            bin_name=bin_name,
-            provider_type=provider_type,
-            default_provider=default_provider,
-            overrides=overrides,
-        )
-        if not func_takes_args_or_kwargs(provider_func):
-            # if it's a pure argless lambdas, dont pass bin_path and other **kwargs
-            provider_func_without_args = cast(Callable[[], Any], provider_func)
-            return provider_func_without_args()
-
-        provider_func = cast(Callable[..., Any], provider_func)
-        return provider_func(bin_name, **kwargs)
-
-
-
-    def on_get_abspath(self, bin_name: BinName, **_) -> HostBinPath | None:
-        print(f'[*] {self.__class__.__name__}: Getting abspath for {bin_name}...')
-        try:
-            return bin_abspath(bin_name)
-        except ValidationError:
-            return None
-
-    def on_get_version(self, bin_name: BinName, abspath: Optional[HostBinPath]=None, **_) -> SemVer | None:
-        abspath = abspath or self._abspath_cache.get(bin_name) or self.get_abspath(bin_name)
-        if not abspath: return None
-
-        print(f'[*] {self.__class__.__name__}: Getting version for {bin_name}...')
-        try:
-            return bin_version(abspath)
-        except ValidationError:
-            return None
-
-    def on_get_subdeps(self, bin_name: BinName, **_) -> InstallStr:
-        print(f'[*] {self.__class__.__name__}: Getting subdependencies for {bin_name}')
-        # ... subdependency calculation logic here
-        return TypeAdapter(InstallStr).validate_python(bin_name)
-
-    @abstractmethod
-    def on_install(self, bin_name: BinName, subdeps: Optional[InstallStr]=None, **_):
-        subdeps = subdeps or self.get_subdeps(bin_name)
-        print(f'[*] {self.__class__.__name__}: Installing subdependencies for {bin_name} ({subdeps})')
-        # ... install logic here
-        assert True
-
-
-    @validate_call
-    def get_abspath(self, bin_name: BinName, overrides: Optional[ProviderLookupDict]=None) -> HostBinPath | None:
-        abspath = self.call_provider_for_action(
-            bin_name=bin_name,
-            provider_type='abspath',
-            default_provider=self.on_get_abspath,
-            overrides=overrides,
-        )
-        if not abspath:
-            return None
-        result = TypeAdapter(HostBinPath).validate_python(abspath)
-        self._abspath_cache[bin_name] = result
-        return result
-
-    @validate_call
-    def get_version(self, bin_name: BinName, abspath: Optional[HostBinPath]=None, overrides: Optional[ProviderLookupDict]=None) -> SemVer | None:
-        version = self.call_provider_for_action(
-            bin_name=bin_name,
-            provider_type='version',
-            default_provider=self.on_get_version,
-            overrides=overrides,
-            abspath=abspath,
-        )
-        if not version:
-            return None
-        result = SemVer(version)
-        self._version_cache[bin_name] = result
-        return result
-
-    @validate_call
-    def get_subdeps(self, bin_name: BinName, overrides: Optional[ProviderLookupDict]=None) -> InstallStr:
-        subdeps = self.call_provider_for_action(
-            bin_name=bin_name,
-            provider_type='subdeps',
-            default_provider=self.on_get_subdeps,
-            overrides=overrides,
-        )
-        if not subdeps:
-            subdeps = bin_name
-        result = TypeAdapter(InstallStr).validate_python(subdeps)
-        return result
-
-    @validate_call
-    def install(self, bin_name: BinName, overrides: Optional[ProviderLookupDict]=None) -> InstalledBin | None:
-        subdeps = self.get_subdeps(bin_name, overrides=overrides)
-
-        self.call_provider_for_action(
-            bin_name=bin_name,
-            provider_type='install',
-            default_provider=self.on_install,
-            overrides=overrides,
-            subdeps=subdeps,
-        )
-
-        installed_abspath = self.get_abspath(bin_name)
-        assert installed_abspath, f'Unable to find {bin_name} abspath after installing with {self.name}'
-
-        installed_version = self.get_version(bin_name, abspath=installed_abspath)
-        assert installed_version, f'Unable to find {bin_name} version after installing with {self.name}'
-        
-        result = InstalledBin(abspath=installed_abspath, version=installed_version)
-        self._install_cache[bin_name] = result
-        return result
-
-    @validate_call
-    def load(self, bin_name: BinName, overrides: Optional[ProviderLookupDict]=None, cache: bool=False) -> InstalledBin | None:
-        installed_abspath = None
-        installed_version = None
-
-        if cache:
-            installed_bin = self._install_cache.get(bin_name)
-            if installed_bin:
-                return installed_bin
-            installed_abspath = self._abspath_cache.get(bin_name)
-            installed_version = self._version_cache.get(bin_name)
-
-
-        installed_abspath = installed_abspath or self.get_abspath(bin_name, overrides=overrides)
-        if not installed_abspath:
-            return None
-
-        installed_version = installed_version or self.get_version(bin_name, abspath=installed_abspath, overrides=overrides)
-        if not installed_version:
-            return None
-
-        return InstalledBin(abspath=installed_abspath, version=installed_version)
-
-    @validate_call
-    def load_or_install(self, bin_name: BinName, overrides: Optional[ProviderLookupDict]=None, cache: bool=True) -> InstalledBin | None:
-        installed = self.load(bin_name, overrides=overrides, cache=cache)
-        if not installed:
-            installed = self.install(bin_name, overrides=overrides)
-        return installed
-
-
-class PipProvider(BinProvider):
-    name: BinProviderName = 'pip'
-
-    def on_install(self, bin_name: str, subdeps: Optional[InstallStr]=None, **_):
-        subdeps = subdeps or self.on_get_subdeps(bin_name)
-        print(f'[*] {self.__class__.__name__}: Installing subdependencies for {bin_name} ({subdeps})')
-        
-        proc = run(['pip', 'install', '--upgrade', *subdeps.split(' ')], stdout=PIPE, stderr=PIPE)
-        
-        if proc.returncode != 0:
-            print(proc.stdout.strip().decode())
-            print(proc.stderr.strip().decode())
-            raise Exception(f'{self.__class__.__name__}: install got returncode {proc.returncode} while installing {subdeps}: {subdeps}')
-
-
-class AptProvider(BinProvider):
-    name: BinProviderName = 'apt'
-    
-    subdeps_provider: ProviderLookupDict = {
-        'yt-dlp': lambda: 'yt-dlp ffmpeg',
-    }
-
-    def on_install(self, bin_name: BinName, subdeps: Optional[InstallStr]=None, **_):
-        subdeps = subdeps or self.on_get_subdeps(bin_name)
-        print(f'[*] {self.__class__.__name__}: Installing subdependencies for {bin_name} ({subdeps})')
-        
-        run(['apt-get', 'update', '-qq'])
-        proc = run(['apt-get', 'install', '-y', *subdeps.split(' ')], stdout=PIPE, stderr=PIPE)
-        
-        if proc.returncode != 0:
-            print(proc.stdout.strip().decode())
-            print(proc.stderr.strip().decode())
-            raise Exception(f'{self.__class__.__name__} install got returncode {proc.returncode} while installing {subdeps}: {subdeps}')
-
-class BrewProvider(BinProvider):
-    name: BinProviderName = 'brew'
-
-    def on_install(self, bin_name: str, subdeps: Optional[InstallStr]=None, **_):
-        subdeps = subdeps or self.on_get_subdeps(bin_name)
-        print(f'[*] {self.__class__.__name__}: Installing subdependencies for {bin_name} ({subdeps})')
-        
-        proc = run(['brew', 'install', *subdeps.split(' ')], stdout=PIPE, stderr=PIPE)
-        
-        if proc.returncode != 0:
-            print(proc.stdout.strip().decode())
-            print(proc.stderr.strip().decode())
-            raise Exception(f'{self.__class__.__name__} install got returncode {proc.returncode} while installing {subdeps}: {subdeps}')
-
-
-class EnvProvider(BinProvider):
-    name: BinProviderName = 'env'
-
-    abspath_provider: ProviderLookupDict = {
-        # 'python': lambda: Path('/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/bin/python3.10'),
-    }
-    version_provider: ProviderLookupDict = {
-        # 'python': lambda: '{}.{}.{}'.format(*sys.version_info[:3]),
-    }
-
-    def on_install(self, bin_name: BinName, subdeps: Optional[InstallStr]=None, **_):
-        """The env provider is ready-only and does not install any packages, so this is a no-op"""
-        pass
--- a/archivebox/plugantic/configs.py
+++ b/archivebox/plugantic/configs.py
@ -1,53 +0,0 @@
-__package__ = 'archivebox.plugantic'
-
-
-from typing import Optional, List, Literal
-from pathlib import Path
-from pydantic import BaseModel, Field
-
-
-ConfigSectionName = Literal['GENERAL_CONFIG', 'ARCHIVE_METHOD_TOGGLES', 'ARCHIVE_METHOD_OPTIONS', 'DEPENDENCY_CONFIG']
-
-
-class ConfigSet(BaseModel):
-    section: ConfigSectionName = 'GENERAL_CONFIG'
-
-class WgetToggleConfig(ConfigSet):
-    section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES'
-
-    SAVE_WGET: bool = True
-    SAVE_WARC: bool = True
-
-class WgetDependencyConfig(ConfigSet):
-    section: ConfigSectionName = 'DEPENDENCY_CONFIG'
-
-    WGET_BINARY: str = Field(default='wget')
-    WGET_ARGS: Optional[List[str]] = Field(default=None)
-    WGET_EXTRA_ARGS: List[str] = []
-    WGET_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
-
-class WgetOptionsConfig(ConfigSet):
-    section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS'
-
-    # loaded from shared config
-    WGET_AUTO_COMPRESSION: bool = Field(default=True)
-    SAVE_WGET_REQUISITES: bool = Field(default=True)
-    WGET_USER_AGENT: str = Field(default='', alias='USER_AGENT')
-    WGET_TIMEOUT: int = Field(default=60, alias='TIMEOUT')
-    WGET_CHECK_SSL_VALIDITY: bool = Field(default=True, alias='CHECK_SSL_VALIDITY')
-    WGET_RESTRICT_FILE_NAMES: str = Field(default='windows', alias='RESTRICT_FILE_NAMES')
-    WGET_COOKIES_FILE: Optional[Path] = Field(default=None, alias='COOKIES_FILE')
-
-
-CONFIG = {
-    'CHECK_SSL_VALIDITY': False,
-    'SAVE_WARC': False,
-    'TIMEOUT': 999,
-}
-
-
-WGET_CONFIG = [
-    WgetToggleConfig(**CONFIG),
-    WgetDependencyConfig(**CONFIG),
-    WgetOptionsConfig(**CONFIG),
-]
--- a/archivebox/plugantic/extractors.py
+++ b/archivebox/plugantic/extractors.py
@ -1,118 +0,0 @@
-__package__ = 'archivebox.plugantic'
-
-from typing import Optional, List, Literal, Annotated, Dict, Any
-from typing_extensions import Self
-
-from abc import ABC
-from pathlib import Path
-
-from pydantic import BaseModel, model_validator, field_serializer, AfterValidator
-
-from .binaries import (
-    Binary,
-    YtdlpBinary,
-    WgetBinary,
-)
-
-
-# stubs
-class Snapshot:
-    pass
-
-class ArchiveResult:
-    pass
-
-def get_wget_output_path(*args, **kwargs) -> Path:
-    return Path('.').resolve()
-
-
-
-def no_empty_args(args: List[str]) -> List[str]:
-    assert all(len(arg) for arg in args)
-    return args
-
-ExtractorName = Literal['wget', 'warc', 'media']
-
-HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))]
-CmdArgsList = Annotated[List[str], AfterValidator(no_empty_args)]
-
-
-class Extractor(ABC, BaseModel):
-    name: ExtractorName
-    binary: Binary
-
-    output_path_func: HandlerFuncStr = 'self.get_output_path'
-    should_extract_func: HandlerFuncStr = 'self.should_extract'
-    extract_func: HandlerFuncStr = 'self.extract'
-    exec_func: HandlerFuncStr = 'self.exec'
-
-    default_args: CmdArgsList = []
-    extra_args: CmdArgsList = []
-    args: Optional[CmdArgsList] = None
-
-    @model_validator(mode='after')
-    def validate_model(self) -> Self:
-        if self.args is None:
-            self.args = [*self.default_args, *self.extra_args]
-        return self
-
-    @field_serializer('binary', when_used='json')
-    def dump_binary(binary) -> str:
-        return binary.name
-
-    def get_output_path(self, snapshot) -> Path:
-        return Path(self.name)
-
-    def should_extract(self, snapshot) -> bool:
-        output_dir = self.get_output_path(snapshot)
-        if output_dir.glob('*.*'):
-            return False
-        return True
-
-
-    def extract(self, url: str, **kwargs) -> Dict[str, Any]:
-        output_dir = self.get_output_path(url, **kwargs)
-
-        cmd = [url, *self.args] if self.args is not None else [url, *self.default_args, *self.extra_args]
-        proc = self.exec(cmd, pwd=output_dir)
-
-        return {
-            'status': 'succeeded' if proc.returncode == 0 else 'failed',
-            'output': proc.stdout.decode().strip().split('\n')[-1],
-            'output_files': list(output_dir.glob('*.*')),
-
-            'stdout': proc.stdout.decode().strip(),
-            'stderr': proc.stderr.decode().strip(),
-            'returncode': proc.returncode,
-        }
-
-    def exec(self, args: CmdArgsList, pwd: Optional[Path]=None):
-        pwd = pwd or Path('.')
-        assert self.binary.loaded_provider
-        return self.binary.exec(args, pwd=pwd)
-
-
-class YtdlpExtractor(Extractor):
-    name: ExtractorName = 'media'
-    binary: Binary = YtdlpBinary()
-
-    def get_output_path(self, snapshot) -> Path:
-        return Path(self.name)
-
-
-class WgetExtractor(Extractor):
-    name: ExtractorName = 'wget'
-    binary: Binary = WgetBinary()
-
-    def get_output_path(self, snapshot) -> Path:
-        return get_wget_output_path(snapshot)
-
-
-class WarcExtractor(Extractor):
-    name: ExtractorName = 'warc'
-    binary: Binary = WgetBinary()
-
-    def get_output_path(self, snapshot) -> Path:
-        return get_wget_output_path(snapshot)
-
-
--- a/archivebox/plugantic/ini_to_toml.py
+++ b/archivebox/plugantic/ini_to_toml.py
@ -1,396 +0,0 @@
-from typing import Dict, Any, List
-
-import configparser
-import json
-import ast
-
-JSONValue = str | bool | int | None | List['JSONValue']
-
-def load_ini_value(val: str) -> JSONValue:
-    """Convert lax INI values into strict TOML-compliant (JSON) values"""
-    if val.lower() in ('true', 'yes', '1'):
-        return True
-    if val.lower() in ('false', 'no', '0'):
-        return False
-    if val.isdigit():
-        return int(val)
-
-    try:
-        return ast.literal_eval(val)
-    except Exception:
-        pass
-
-    try:
-        return json.loads(val)
-    except Exception as err:
-        pass
-    
-    return val
-
-
-def convert(ini_str: str) -> str:
-    """Convert a string of INI config into its TOML equivalent (warning: strips comments)"""
-
-    config = configparser.ConfigParser()
-    config.optionxform = str  # capitalize key names
-    config.read_string(ini_str)
-
-    # Initialize an empty dictionary to store the TOML representation
-    toml_dict = {}
-
-    # Iterate over each section in the INI configuration
-    for section in config.sections():
-        toml_dict[section] = {}
-
-        # Iterate over each key-value pair in the section
-        for key, value in config.items(section):
-            parsed_value = load_ini_value(value)
-
-            # Convert the parsed value to its TOML-compatible JSON representation
-            toml_dict[section.upper()][key.upper()] = json.dumps(parsed_value)
-
-    # Build the TOML string
-    toml_str = ""
-    for section, items in toml_dict.items():
-        toml_str += f"[{section}]\n"
-        for key, value in items.items():
-            toml_str += f"{key} = {value}\n"
-        toml_str += "\n"
-
-    return toml_str.strip()
-
-
-
-### Basic Assertions
-
-test_input = """
-[SERVER_CONFIG]
-IS_TTY=False
-USE_COLOR=False
-SHOW_PROGRESS=False
-IN_DOCKER=False
-IN_QEMU=False
-PUID=501
-PGID=20
-OUTPUT_DIR=/opt/archivebox/data
-CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf
-ONLY_NEW=True
-TIMEOUT=60
-MEDIA_TIMEOUT=3600
-OUTPUT_PERMISSIONS=644
-RESTRICT_FILE_NAMES=windows
-URL_DENYLIST=\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$
-URL_ALLOWLIST=None
-ADMIN_USERNAME=None
-ADMIN_PASSWORD=None
-ENFORCE_ATOMIC_WRITES=True
-TAG_SEPARATOR_PATTERN=[,]
-SECRET_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
-BIND_ADDR=127.0.0.1:8000
-ALLOWED_HOSTS=*
-DEBUG=False
-PUBLIC_INDEX=True
-PUBLIC_SNAPSHOTS=True
-PUBLIC_ADD_VIEW=False
-FOOTER_INFO=Content is hosted for personal archiving purposes only.  Contact server owner for any takedown requests.
-SNAPSHOTS_PER_PAGE=40
-CUSTOM_TEMPLATES_DIR=None
-TIME_ZONE=UTC
-TIMEZONE=UTC
-REVERSE_PROXY_USER_HEADER=Remote-User
-REVERSE_PROXY_WHITELIST=
-LOGOUT_REDIRECT_URL=/
-PREVIEW_ORIGINALS=True
-LDAP=False
-LDAP_SERVER_URI=None
-LDAP_BIND_DN=None
-LDAP_BIND_PASSWORD=None
-LDAP_USER_BASE=None
-LDAP_USER_FILTER=None
-LDAP_USERNAME_ATTR=None
-LDAP_FIRSTNAME_ATTR=None
-LDAP_LASTNAME_ATTR=None
-LDAP_EMAIL_ATTR=None
-LDAP_CREATE_SUPERUSER=False
-SAVE_TITLE=True
-SAVE_FAVICON=True
-SAVE_WGET=True
-SAVE_WGET_REQUISITES=True
-SAVE_SINGLEFILE=True
-SAVE_READABILITY=True
-SAVE_MERCURY=True
-SAVE_HTMLTOTEXT=True
-SAVE_PDF=True
-SAVE_SCREENSHOT=True
-SAVE_DOM=True
-SAVE_HEADERS=True
-SAVE_WARC=True
-SAVE_GIT=True
-SAVE_MEDIA=True
-SAVE_ARCHIVE_DOT_ORG=True
-RESOLUTION=1440,2000
-GIT_DOMAINS=github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht
-CHECK_SSL_VALIDITY=True
-MEDIA_MAX_SIZE=750m
-USER_AGENT=None
-CURL_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)
-WGET_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5
-CHROME_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)
-COOKIES_FILE=None
-CHROME_USER_DATA_DIR=None
-CHROME_TIMEOUT=0
-CHROME_HEADLESS=True
-CHROME_SANDBOX=True
-CHROME_EXTRA_ARGS=[]
-YOUTUBEDL_ARGS=['--restrict-filenames', '--trim-filenames', '128', '--write-description', '--write-info-json', '--write-annotations', '--write-thumbnail', '--no-call-home', '--write-sub', '--write-auto-subs', '--convert-subs=srt', '--yes-playlist', '--continue', '--no-abort-on-error', '--ignore-errors', '--geo-bypass', '--add-metadata', '--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)']
-YOUTUBEDL_EXTRA_ARGS=[]
-WGET_ARGS=['--no-verbose', '--adjust-extension', '--convert-links', '--force-directories', '--backup-converted', '--span-hosts', '--no-parent', '-e', 'robots=off']
-WGET_EXTRA_ARGS=[]
-CURL_ARGS=['--silent', '--location', '--compressed']
-CURL_EXTRA_ARGS=[]
-GIT_ARGS=['--recursive']
-SINGLEFILE_ARGS=[]
-SINGLEFILE_EXTRA_ARGS=[]
-MERCURY_ARGS=['--format=text']
-MERCURY_EXTRA_ARGS=[]
-FAVICON_PROVIDER=https://www.google.com/s2/favicons?domain={}
-USE_INDEXING_BACKEND=True
-USE_SEARCHING_BACKEND=True
-SEARCH_BACKEND_ENGINE=ripgrep
-SEARCH_BACKEND_HOST_NAME=localhost
-SEARCH_BACKEND_PORT=1491
-SEARCH_BACKEND_PASSWORD=SecretPassword
-SEARCH_PROCESS_HTML=True
-SONIC_COLLECTION=archivebox
-SONIC_BUCKET=snapshots
-SEARCH_BACKEND_TIMEOUT=90
-FTS_SEPARATE_DATABASE=True
-FTS_TOKENIZERS=porter unicode61 remove_diacritics 2
-FTS_SQLITE_MAX_LENGTH=1000000000
-USE_CURL=True
-USE_WGET=True
-USE_SINGLEFILE=True
-USE_READABILITY=True
-USE_MERCURY=True
-USE_GIT=True
-USE_CHROME=True
-USE_NODE=True
-USE_YOUTUBEDL=True
-USE_RIPGREP=True
-CURL_BINARY=curl
-GIT_BINARY=git
-WGET_BINARY=wget
-SINGLEFILE_BINARY=single-file
-READABILITY_BINARY=readability-extractor
-MERCURY_BINARY=postlight-parser
-YOUTUBEDL_BINARY=yt-dlp
-NODE_BINARY=node
-RIPGREP_BINARY=rg
-CHROME_BINARY=chrome
-POCKET_CONSUMER_KEY=None
-USER=squash
-PACKAGE_DIR=/opt/archivebox/archivebox
-TEMPLATES_DIR=/opt/archivebox/archivebox/templates
-ARCHIVE_DIR=/opt/archivebox/data/archive
-SOURCES_DIR=/opt/archivebox/data/sources
-LOGS_DIR=/opt/archivebox/data/logs
-PERSONAS_DIR=/opt/archivebox/data/personas
-URL_DENYLIST_PTN=re.compile('\\.(css|js|otf|ttf|woff|woff2|gstatic\\.com|googleapis\\.com/css)(\\?.*)?$', re.IGNORECASE|re.MULTILINE)
-URL_ALLOWLIST_PTN=None
-DIR_OUTPUT_PERMISSIONS=755
-ARCHIVEBOX_BINARY=/opt/archivebox/.venv/bin/archivebox
-VERSION=0.8.0
-COMMIT_HASH=102e87578c6036bb0132dd1ebd17f8f05ffc880f
-BUILD_TIME=2024-05-15 03:28:05 1715768885
-VERSIONS_AVAILABLE=None
-CAN_UPGRADE=False
-PYTHON_BINARY=/opt/archivebox/.venv/bin/python3.10
-PYTHON_ENCODING=UTF-8
-PYTHON_VERSION=3.10.14
-DJANGO_BINARY=/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py
-DJANGO_VERSION=5.0.6 final (0)
-SQLITE_BINARY=/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py
-SQLITE_VERSION=2.6.0
-CURL_VERSION=curl 8.4.0 (x86_64-apple-darwin23.0)
-WGET_VERSION=GNU Wget 1.24.5
-WGET_AUTO_COMPRESSION=True
-RIPGREP_VERSION=ripgrep 14.1.0
-SINGLEFILE_VERSION=None
-READABILITY_VERSION=None
-MERCURY_VERSION=None
-GIT_VERSION=git version 2.44.0
-YOUTUBEDL_VERSION=2024.04.09
-CHROME_VERSION=Google Chrome 124.0.6367.207
-NODE_VERSION=v21.7.3
-"""
-
-
-expected_output = '''[SERVER_CONFIG]
-IS_TTY = false
-USE_COLOR = false
-SHOW_PROGRESS = false
-IN_DOCKER = false
-IN_QEMU = false
-PUID = 501
-PGID = 20
-OUTPUT_DIR = "/opt/archivebox/data"
-CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf"
-ONLY_NEW = true
-TIMEOUT = 60
-MEDIA_TIMEOUT = 3600
-OUTPUT_PERMISSIONS = 644
-RESTRICT_FILE_NAMES = "windows"
-URL_DENYLIST = "\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$"
-URL_ALLOWLIST = null
-ADMIN_USERNAME = null
-ADMIN_PASSWORD = null
-ENFORCE_ATOMIC_WRITES = true
-TAG_SEPARATOR_PATTERN = "[,]"
-SECRET_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
-BIND_ADDR = "127.0.0.1:8000"
-ALLOWED_HOSTS = "*"
-DEBUG = false
-PUBLIC_INDEX = true
-PUBLIC_SNAPSHOTS = true
-PUBLIC_ADD_VIEW = false
-FOOTER_INFO = "Content is hosted for personal archiving purposes only.  Contact server owner for any takedown requests."
-SNAPSHOTS_PER_PAGE = 40
-CUSTOM_TEMPLATES_DIR = null
-TIME_ZONE = "UTC"
-TIMEZONE = "UTC"
-REVERSE_PROXY_USER_HEADER = "Remote-User"
-REVERSE_PROXY_WHITELIST = ""
-LOGOUT_REDIRECT_URL = "/"
-PREVIEW_ORIGINALS = true
-LDAP = false
-LDAP_SERVER_URI = null
-LDAP_BIND_DN = null
-LDAP_BIND_PASSWORD = null
-LDAP_USER_BASE = null
-LDAP_USER_FILTER = null
-LDAP_USERNAME_ATTR = null
-LDAP_FIRSTNAME_ATTR = null
-LDAP_LASTNAME_ATTR = null
-LDAP_EMAIL_ATTR = null
-LDAP_CREATE_SUPERUSER = false
-SAVE_TITLE = true
-SAVE_FAVICON = true
-SAVE_WGET = true
-SAVE_WGET_REQUISITES = true
-SAVE_SINGLEFILE = true
-SAVE_READABILITY = true
-SAVE_MERCURY = true
-SAVE_HTMLTOTEXT = true
-SAVE_PDF = true
-SAVE_SCREENSHOT = true
-SAVE_DOM = true
-SAVE_HEADERS = true
-SAVE_WARC = true
-SAVE_GIT = true
-SAVE_MEDIA = true
-SAVE_ARCHIVE_DOT_ORG = true
-RESOLUTION = [1440, 2000]
-GIT_DOMAINS = "github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht"
-CHECK_SSL_VALIDITY = true
-MEDIA_MAX_SIZE = "750m"
-USER_AGENT = null
-CURL_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)"
-WGET_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5"
-CHROME_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)"
-COOKIES_FILE = null
-CHROME_USER_DATA_DIR = null
-CHROME_TIMEOUT = false
-CHROME_HEADLESS = true
-CHROME_SANDBOX = true
-CHROME_EXTRA_ARGS = []
-YOUTUBEDL_ARGS = ["--restrict-filenames", "--trim-filenames", "128", "--write-description", "--write-info-json", "--write-annotations", "--write-thumbnail", "--no-call-home", "--write-sub", "--write-auto-subs", "--convert-subs=srt", "--yes-playlist", "--continue", "--no-abort-on-error", "--ignore-errors", "--geo-bypass", "--add-metadata", "--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)"]
-YOUTUBEDL_EXTRA_ARGS = []
-WGET_ARGS = ["--no-verbose", "--adjust-extension", "--convert-links", "--force-directories", "--backup-converted", "--span-hosts", "--no-parent", "-e", "robots=off"]
-WGET_EXTRA_ARGS = []
-CURL_ARGS = ["--silent", "--location", "--compressed"]
-CURL_EXTRA_ARGS = []
-GIT_ARGS = ["--recursive"]
-SINGLEFILE_ARGS = []
-SINGLEFILE_EXTRA_ARGS = []
-MERCURY_ARGS = ["--format=text"]
-MERCURY_EXTRA_ARGS = []
-FAVICON_PROVIDER = "https://www.google.com/s2/favicons?domain={}"
-USE_INDEXING_BACKEND = true
-USE_SEARCHING_BACKEND = true
-SEARCH_BACKEND_ENGINE = "ripgrep"
-SEARCH_BACKEND_HOST_NAME = "localhost"
-SEARCH_BACKEND_PORT = 1491
-SEARCH_BACKEND_PASSWORD = "SecretPassword"
-SEARCH_PROCESS_HTML = true
-SONIC_COLLECTION = "archivebox"
-SONIC_BUCKET = "snapshots"
-SEARCH_BACKEND_TIMEOUT = 90
-FTS_SEPARATE_DATABASE = true
-FTS_TOKENIZERS = "porter unicode61 remove_diacritics 2"
-FTS_SQLITE_MAX_LENGTH = 1000000000
-USE_CURL = true
-USE_WGET = true
-USE_SINGLEFILE = true
-USE_READABILITY = true
-USE_MERCURY = true
-USE_GIT = true
-USE_CHROME = true
-USE_NODE = true
-USE_YOUTUBEDL = true
-USE_RIPGREP = true
-CURL_BINARY = "curl"
-GIT_BINARY = "git"
-WGET_BINARY = "wget"
-SINGLEFILE_BINARY = "single-file"
-READABILITY_BINARY = "readability-extractor"
-MERCURY_BINARY = "postlight-parser"
-YOUTUBEDL_BINARY = "yt-dlp"
-NODE_BINARY = "node"
-RIPGREP_BINARY = "rg"
-CHROME_BINARY = "chrome"
-POCKET_CONSUMER_KEY = null
-USER = "squash"
-PACKAGE_DIR = "/opt/archivebox/archivebox"
-TEMPLATES_DIR = "/opt/archivebox/archivebox/templates"
-ARCHIVE_DIR = "/opt/archivebox/data/archive"
-SOURCES_DIR = "/opt/archivebox/data/sources"
-LOGS_DIR = "/opt/archivebox/data/logs"
-PERSONAS_DIR = "/opt/archivebox/data/personas"
-URL_DENYLIST_PTN = "re.compile(\'\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$\', re.IGNORECASE|re.MULTILINE)"
-URL_ALLOWLIST_PTN = null
-DIR_OUTPUT_PERMISSIONS = 755
-ARCHIVEBOX_BINARY = "/opt/archivebox/.venv/bin/archivebox"
-VERSION = "0.8.0"
-COMMIT_HASH = "102e87578c6036bb0132dd1ebd17f8f05ffc880f"
-BUILD_TIME = "2024-05-15 03:28:05 1715768885"
-VERSIONS_AVAILABLE = null
-CAN_UPGRADE = false
-PYTHON_BINARY = "/opt/archivebox/.venv/bin/python3.10"
-PYTHON_ENCODING = "UTF-8"
-PYTHON_VERSION = "3.10.14"
-DJANGO_BINARY = "/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py"
-DJANGO_VERSION = "5.0.6 final (0)"
-SQLITE_BINARY = "/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py"
-SQLITE_VERSION = "2.6.0"
-CURL_VERSION = "curl 8.4.0 (x86_64-apple-darwin23.0)"
-WGET_VERSION = "GNU Wget 1.24.5"
-WGET_AUTO_COMPRESSION = true
-RIPGREP_VERSION = "ripgrep 14.1.0"
-SINGLEFILE_VERSION = null
-READABILITY_VERSION = null
-MERCURY_VERSION = null
-GIT_VERSION = "git version 2.44.0"
-YOUTUBEDL_VERSION = "2024.04.09"
-CHROME_VERSION = "Google Chrome 124.0.6367.207"
-NODE_VERSION = "v21.7.3"'''
-
-
-first_output = convert(test_input)      # make sure ini -> toml parses correctly
-second_output = convert(first_output)   # make sure toml -> toml parses/dumps consistently
-assert first_output == second_output == expected_output  # make sure parsing is indempotent
-
-# # DEBUGGING
-# import sys
-# import difflib
-# sys.stdout.writelines(difflib.context_diff(first_output, second_output, fromfile='first', tofile='second'))
-# print(repr(second_output))
--- a/archivebox/plugantic/migrations/0001_initial.py
+++ b/archivebox/plugantic/migrations/0001_initial.py
@ -1,38 +0,0 @@
-# Generated by Django 5.0.6 on 2024-05-18 00:16
-
-import abid_utils.models
-import archivebox.plugantic.plugins
-import charidfield.fields
-import django.core.serializers.json
-import django.db.models.deletion
-import django_pydantic_field.fields
-import uuid
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    initial = True
-
-    dependencies = [
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name='Plugin',
-            fields=[
-                ('created', models.DateTimeField(auto_now_add=True)),
-                ('modified', models.DateTimeField(auto_now=True)),
-                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
-                ('uuid', models.UUIDField(blank=True, null=True, unique=True)),
-                ('abid', charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='plg_', unique=True)),
-                ('schema', django_pydantic_field.fields.PydanticSchemaField(config=None, encoder=django.core.serializers.json.DjangoJSONEncoder, schema=archivebox.plugantic.plugins.Plugin)),
-                ('created_by', models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
-            ],
-            options={
-                'abstract': False,
-            },
-        ),
-    ]
--- a/archivebox/plugantic/migrations/0002_alter_plugin_schema.py
+++ b/archivebox/plugantic/migrations/0002_alter_plugin_schema.py
@ -1,21 +0,0 @@
-# Generated by Django 5.0.6 on 2024-05-18 01:16
-
-import archivebox.plugantic.plugins
-import django.core.serializers.json
-import django_pydantic_field.fields
-from django.db import migrations
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('plugantic', '0001_initial'),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name='plugin',
-            name='schema',
-            field=django_pydantic_field.fields.PydanticSchemaField(config=None, default=None, encoder=django.core.serializers.json.DjangoJSONEncoder, schema=archivebox.plugantic.plugins.Plugin),
-        ),
-    ]
--- a/archivebox/plugantic/migrations/0003_alter_plugin_schema.py
+++ b/archivebox/plugantic/migrations/0003_alter_plugin_schema.py
@ -1,21 +0,0 @@
-# Generated by Django 5.0.6 on 2024-05-18 01:25
-
-import archivebox.plugantic.replayers
-import django.core.serializers.json
-import django_pydantic_field.fields
-from django.db import migrations
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('plugantic', '0002_alter_plugin_schema'),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name='plugin',
-            name='schema',
-            field=django_pydantic_field.fields.PydanticSchemaField(config=None, default={'embed_template': 'plugins/generic_replayer/templates/embed.html', 'fullpage_template': 'plugins/generic_replayer/templates/fullpage.html', 'name': 'GenericReplayer', 'row_template': 'plugins/generic_replayer/templates/row.html', 'url_pattern': '*'}, encoder=django.core.serializers.json.DjangoJSONEncoder, schema=archivebox.plugantic.replayers.Replayer),
-        ),
-    ]
--- a/archivebox/plugantic/migrations/0004_remove_plugin_schema_plugin_configs_plugin_name.py
+++ b/archivebox/plugantic/migrations/0004_remove_plugin_schema_plugin_configs_plugin_name.py
@ -1,32 +0,0 @@
-# Generated by Django 5.0.6 on 2024-05-18 01:28
-
-import archivebox.plugantic.configs
-import django.core.serializers.json
-import django_pydantic_field.compat.django
-import django_pydantic_field.fields
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('plugantic', '0003_alter_plugin_schema'),
-    ]
-
-    operations = [
-        migrations.RemoveField(
-            model_name='plugin',
-            name='schema',
-        ),
-        migrations.AddField(
-            model_name='plugin',
-            name='configs',
-            field=django_pydantic_field.fields.PydanticSchemaField(config=None, default=[], encoder=django.core.serializers.json.DjangoJSONEncoder, schema=django_pydantic_field.compat.django.GenericContainer(list, (archivebox.plugantic.configs.ConfigSet,))),
-        ),
-        migrations.AddField(
-            model_name='plugin',
-            name='name',
-            field=models.CharField(default='name', max_length=64, unique=True),
-            preserve_default=False,
-        ),
-    ]
--- a/archivebox/plugantic/migrations/0005_customplugin_delete_plugin.py
+++ b/archivebox/plugantic/migrations/0005_customplugin_delete_plugin.py
@ -1,39 +0,0 @@
-# Generated by Django 5.0.6 on 2024-05-18 01:42
-
-import abid_utils.models
-import charidfield.fields
-import django.db.models.deletion
-import pathlib
-import uuid
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('plugantic', '0004_remove_plugin_schema_plugin_configs_plugin_name'),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name='CustomPlugin',
-            fields=[
-                ('created', models.DateTimeField(auto_now_add=True)),
-                ('modified', models.DateTimeField(auto_now=True)),
-                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
-                ('uuid', models.UUIDField(blank=True, null=True, unique=True)),
-                ('abid', charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='plg_', unique=True)),
-                ('name', models.CharField(max_length=64, unique=True)),
-                ('path', models.FilePathField(path=pathlib.PurePosixPath('/Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox/archivebox/plugins'))),
-                ('created_by', models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
-            ],
-            options={
-                'abstract': False,
-            },
-        ),
-        migrations.DeleteModel(
-            name='Plugin',
-        ),
-    ]
--- a/archivebox/plugantic/migrations/0006_alter_customplugin_path.py
+++ b/archivebox/plugantic/migrations/0006_alter_customplugin_path.py
@ -1,19 +0,0 @@
-# Generated by Django 5.0.6 on 2024-05-18 01:45
-
-import pathlib
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('plugantic', '0005_customplugin_delete_plugin'),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name='customplugin',
-            name='path',
-            field=models.FilePathField(allow_files=False, allow_folders=True, path=pathlib.PurePosixPath('/Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox/archivebox/plugins'), recursive=True),
-        ),
-    ]
--- a/archivebox/plugantic/migrations/0007_alter_customplugin_path.py
+++ b/archivebox/plugantic/migrations/0007_alter_customplugin_path.py
@ -1,19 +0,0 @@
-# Generated by Django 5.0.6 on 2024-05-18 01:46
-
-import pathlib
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('plugantic', '0006_alter_customplugin_path'),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name='customplugin',
-            name='path',
-            field=models.FilePathField(allow_files=False, allow_folders=True, path=pathlib.PurePosixPath('/Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox/data/plugins'), recursive=True),
-        ),
-    ]
--- a/archivebox/plugantic/migrations/0008_alter_customplugin_path.py
+++ b/archivebox/plugantic/migrations/0008_alter_customplugin_path.py
@ -1,19 +0,0 @@
-# Generated by Django 5.0.6 on 2024-05-18 01:47
-
-import pathlib
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('plugantic', '0007_alter_customplugin_path'),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name='customplugin',
-            name='path',
-            field=models.FilePathField(allow_files=False, allow_folders=True, path=pathlib.PurePosixPath('/Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox/data'), recursive=True),
-        ),
-    ]
--- a/archivebox/plugantic/migrations/0009_alter_customplugin_path.py
+++ b/archivebox/plugantic/migrations/0009_alter_customplugin_path.py
@ -1,18 +0,0 @@
-# Generated by Django 5.0.6 on 2024-05-18 01:48
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('plugantic', '0008_alter_customplugin_path'),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name='customplugin',
-            name='path',
-            field=models.FilePathField(allow_files=False, allow_folders=True, path='/Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox/data', recursive=True),
-        ),
-    ]
--- a/Show more
+++ b/Show more