Merge branch 'dev' into issue1316

2024-05-20 04:12:30 +12:00 · 2024-05-06 23:14:16 -07:00 · 2024-05-06 23:14:16 -07:00 · ef856e8051
parent 27d5d1ddc8 664e09f0b4
commit ef856e8051
50 changed files with 1469 additions and 1694 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -17,6 +17,11 @@ venv/
 .venv-old/
 .docker-venv/
 node_modules/
+chrome/
+chromeprofile/
+
+pdm.dev.lock
+pdm.lock

 docs/
 build/
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@ -1,3 +1,2 @@
-github: pirate
-patreon: theSquashSH
-custom: ["https://hcb.hackclub.com/donations/start/archivebox", "https://paypal.me/NicholasSweeting"]
+github: ["ArchiveBox", "pirate"]
+custom: ["https://donate.archivebox.io", "https://paypal.me/NicholasSweeting"]
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -11,7 +11,7 @@ on:

 env:
  DOCKER_IMAGE: archivebox-ci
-      
+
 jobs:
  buildx:
    runs-on: ubuntu-latest
@ -24,21 +24,21 @@ jobs:

      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3
-      
+
      - name: Set up Docker Buildx
        id: buildx
        uses: docker/setup-buildx-action@v3
        with:
          version: latest
          install: true
-          platforms: linux/amd64,linux/arm64,linux/arm/v7
-      
+          platforms: linux/amd64,linux/arm64
+
      - name: Builder instance name
        run: echo ${{ steps.buildx.outputs.name }}
-      
+
      - name: Available platforms
        run: echo ${{ steps.buildx.outputs.platforms }}
-      
+
      - name: Cache Docker layers
        uses: actions/cache@v3
        with:
@ -51,21 +51,27 @@ jobs:
        uses: docker/login-action@v3
        if: github.event_name != 'pull_request'
        with:
-           username: ${{ secrets.DOCKER_USERNAME }}
-           password: ${{ secrets.DOCKER_PASSWORD }}
-      
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
      - name: Collect Docker tags
+        # https://github.com/docker/metadata-action
        id: docker_meta
        uses: docker/metadata-action@v5
        with:
          images: archivebox/archivebox,nikisweeting/archivebox
          tags: |
+              # :stable
              type=ref,event=branch
+              # :0.7.3
              type=semver,pattern={{version}}
+              # :0.7
              type=semver,pattern={{major}}.{{minor}}
+              # :sha-463ea54
              type=sha
-              type=raw,value=latest,enable={{is_default_branch}}
-      
+              # :latest
+              type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'stable') }}
+
      - name: Build and push
        id: docker_build
        uses: docker/build-push-action@v5
@ -77,7 +83,7 @@ jobs:
          tags: ${{ steps.docker_meta.outputs.tags }}
          cache-from: type=local,src=/tmp/.buildx-cache
          cache-to: type=local,dest=/tmp/.buildx-cache-new
-          platforms: linux/amd64,linux/arm64,linux/arm/v7
+          platforms: linux/amd64,linux/arm64

      - name: Image digest
        run: echo ${{ steps.docker_build.outputs.digest }}
@ -88,7 +94,7 @@ jobs:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_PASSWORD }}
          repository: archivebox/archivebox
-       
+
      # This ugly bit is necessary if you don't want your cache to grow forever
      # until it hits GitHub's limit of 5GB.
      # Temp fix
--- a/.gitignore
+++ b/.gitignore
@ -13,8 +13,9 @@ venv/
 node_modules/

 # Ignore dev lockfiles (should always be built fresh)
-requirements-dev.txt
+pdm.lock
 pdm.dev.lock
+requirements-dev.txt

 # Packaging artifacts
 .pdm-python
@ -26,9 +27,6 @@ dist/

 # Data folders
 data/
-data1/
-data2/
-data3/
 data*/
 output/

--- a/91
+++ b/91
@ -37,7 +37,7 @@ LABEL name="archivebox" \
    com.docker.extension.detailed-description='See here for detailed documentation: https://wiki.archivebox.io' \
    com.docker.extension.changelog='See here for release notes: https://github.com/ArchiveBox/ArchiveBox/releases' \
    com.docker.extension.categories='database,utility-tools'
-    
+
 ARG TARGETPLATFORM
 ARG TARGETOS
 ARG TARGETARCH
@ -87,7 +87,9 @@ COPY --chown=root:root --chmod=755 package.json "$CODE_DIR/"
 RUN grep '"version": ' "${CODE_DIR}/package.json" | awk -F'"' '{print $4}' > /VERSION.txt

 # Force apt to leave downloaded binaries in /var/cache/apt (massively speeds up Docker builds)
-RUN echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache \
+RUN echo 'Binary::apt::APT::Keep-Downloaded-Packages "1";' > /etc/apt/apt.conf.d/99keep-cache \
+    && echo 'APT::Install-Recommends "0";' > /etc/apt/apt.conf.d/99no-intall-recommends \
+    && echo 'APT::Install-Suggests "0";' > /etc/apt/apt.conf.d/99no-intall-suggests \
    && rm -f /etc/apt/apt.conf.d/docker-clean

 # Print debug info about build and save it to disk, for human eyes only, not used by anything else
@ -120,10 +122,10 @@ RUN echo "[*] Setting up $ARCHIVEBOX_USER user uid=${DEFAULT_PUID}..." \
 # Install system apt dependencies (adding backports to access more recent apt updates)
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT \
    echo "[+] Installing APT base system dependencies for $TARGETPLATFORM..." \
-    && echo 'deb https://deb.debian.org/debian bookworm-backports main contrib non-free' >> /etc/apt/sources.list.d/backports.list \
+    && echo 'deb https://deb.debian.org/debian bookworm-backports main contrib non-free' > /etc/apt/sources.list.d/backports.list \
    && mkdir -p /etc/apt/keyrings \
    && apt-get update -qq \
-    && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
+    && apt-get install -qq -y -t bookworm-backports \
        # 1. packaging dependencies
        apt-transport-https ca-certificates apt-utils gnupg2 curl wget \
        # 2. docker and init system dependencies
@ -134,27 +136,13 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T

 ######### Language Environments ####################################

-# Install Node environment
-RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \
-    echo "[+] Installing Node $NODE_VERSION environment in $NODE_MODULES..." \
-    && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" >> /etc/apt/sources.list.d/nodejs.list \
-    && curl -fsSL "https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key" | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
-    && apt-get update -qq \
-    && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
-        nodejs libatomic1 python3-minimal \
-    && rm -rf /var/lib/apt/lists/* \
-    # Update NPM to latest version
-    && npm i -g npm --cache /root/.npm \
-    # Save version info
-    && ( \
-        which node && node --version \
-        && which npm && npm --version \
-        && echo -e '\n\n' \
-    ) | tee -a /VERSION.txt
-
 # Install Python environment
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
    echo "[+] Setting up Python $PYTHON_VERSION runtime..." \
+    # && apt-get update -qq \
+    # && apt-get install -qq -y -t bookworm-backports --no-upgrade \
+    #     python${PYTHON_VERSION} python${PYTHON_VERSION}-minimal python3-pip \
+    # && rm -rf /var/lib/apt/lists/* \
    # tell PDM to allow using global system python site packages
    # && rm /usr/lib/python3*/EXTERNALLY-MANAGED \
    # create global virtual environment GLOBAL_VENV to use (better than using pip install --global)
@ -171,13 +159,34 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
        && echo -e '\n\n' \
    ) | tee -a /VERSION.txt

+
+# Install Node environment
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \
+    echo "[+] Installing Node $NODE_VERSION environment in $NODE_MODULES..." \
+    && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" >> /etc/apt/sources.list.d/nodejs.list \
+    && curl -fsSL "https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key" | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
+    && apt-get update -qq \
+    && apt-get install -qq -y -t bookworm-backports --no-upgrade libatomic1 \
+    && apt-get install -y -t bookworm-backports --no-upgrade \
+        nodejs \
+    && rm -rf /var/lib/apt/lists/* \
+    # Update NPM to latest version
+    && npm i -g npm --cache /root/.npm \
+    # Save version info
+    && ( \
+        which node && node --version \
+        && which npm && npm --version \
+        && echo -e '\n\n' \
+    ) | tee -a /VERSION.txt
+
+
 ######### Extractor Dependencies ##################################

 # Install apt dependencies
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
    echo "[+] Installing APT extractor dependencies globally using apt..." \
    && apt-get update -qq \
-    && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
+    && apt-get install -qq -y -t bookworm-backports \
        curl wget git yt-dlp ffmpeg ripgrep \
        # Packages we have also needed in the past:
        # youtube-dl wget2 aria2 python3-pyxattr rtmpdump libfribidi-bin mpv \
@ -196,25 +205,21 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/ms-playwright,sharing=locked,id=browsers-$TARGETARCH$TARGETVARIANT \
    echo "[+] Installing Browser binary dependencies to $PLAYWRIGHT_BROWSERS_PATH..." \
    && apt-get update -qq \
-    && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
+    && apt-get install -qq -y -t bookworm-backports \
        fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-khmeros fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
+        at-spi2-common fonts-liberation fonts-noto-color-emoji fonts-tlwg-loma-otf fonts-unifont libatk-bridge2.0-0 libatk1.0-0 libatspi2.0-0 libavahi-client3 \
+        libavahi-common-data libavahi-common3 libcups2 libfontenc1 libice6 libnspr4 libnss3 libsm6 libunwind8 \
+        libxaw7 libxcomposite1 libxdamage1 libxfont2 \
+        libxkbfile1 libxmu6 libxpm4 libxt6 x11-xkb-utils xfonts-encodings \
+        # xfonts-scalable xfonts-utils xserver-common xvfb \
        # chrome can run without dbus/upower technically, it complains about missing dbus but should run ok anyway
        # libxss1 dbus dbus-x11 upower \
    # && service dbus start \
-    && if [[ "$TARGETPLATFORM" == *amd64* || "$TARGETPLATFORM" == *arm64* ]]; then \
-        # install Chromium using playwright
-        pip install playwright \
-        && cp -r /root/.cache/ms-playwright "$PLAYWRIGHT_BROWSERS_PATH" \
-        && playwright install --with-deps chromium \
-        && export CHROME_BINARY="$(python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')"; \
-    else \
-        # fall back to installing Chromium via apt-get on platforms not supported by playwright (e.g. risc, ARMv7, etc.)
-        # apt-get install -qq -y -t bookworm-backports --no-install-recommends \
-        #     chromium \
-        # && export CHROME_BINARY="$(which chromium)"; \
-        echo 'armv7 no longer supported in versions after v0.7.3' \
-        exit 1; \
-    fi \
+    # install Chromium using playwright
+    && pip install playwright \
+    && cp -r /root/.cache/ms-playwright "$PLAYWRIGHT_BROWSERS_PATH" \
+    && playwright install chromium \
+    && export CHROME_BINARY="$(python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')" \
    && rm -rf /var/lib/apt/lists/* \
    && ln -s "$CHROME_BINARY" /usr/bin/chromium-browser \
    && mkdir -p "/home/${ARCHIVEBOX_USER}/.config/chromium/Crash Reports/pending/" \
@ -247,8 +252,8 @@ COPY --chown=root:root --chmod=755 "./pyproject.toml" "requirements.txt" "$CODE_
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
    echo "[+] Installing PIP ArchiveBox dependencies from requirements.txt for ${TARGETPLATFORM}..." \
    && apt-get update -qq \
-    && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
-        build-essential \
+    && apt-get install -qq -y -t bookworm-backports \
+        # build-essential \
        libssl-dev libldap2-dev libsasl2-dev \
        python3-ldap python3-msgpack python3-mutagen python3-regex python3-pycryptodome procps \
    # && ln -s "$GLOBAL_VENV" "$APP_VENV" \
@ -258,8 +263,8 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
    # && pdm export -o requirements.txt --without-hashes \
    # && source $GLOBAL_VENV/bin/activate \
    && pip install -r requirements.txt \
-    && apt-get purge -y \
-        build-essential \
+    # && apt-get purge -y \
+        # build-essential \
    && apt-get autoremove -y \
    && rm -rf /var/lib/apt/lists/*

@ -269,7 +274,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
    echo "[*] Installing PIP ArchiveBox package from $CODE_DIR..." \
    # && apt-get update -qq \
    # install C compiler to build deps on platforms that dont have 32-bit wheels available on pypi
-    # && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
+    # && apt-get install -qq -y -t bookworm-backports \
    #     build-essential  \
    # INSTALL ARCHIVEBOX python package globally from CODE_DIR, with all optional dependencies
    && pip install -e "$CODE_DIR"[sonic,ldap] \
--- a/README.md
+++ b/README.md
@ -407,7 +407,7 @@ See <a href="#%EF%B8%8F-cli-usage">below</a> for usage examples using the CLI, W
 > *Warning: These are contributed by external volunteers and may lag behind the official `pip` channel.*

 <ul>
-<li>TrueNAS: <a href="https://truecharts.org/charts/incubator/archivebox/">Official ArchiveBox TrueChart</a> / <a href="https://dev.to/finloop/setting-up-archivebox-on-truenas-scale-1788">Custom App Guide</a></li>
+<li>TrueNAS: <a href="https://truecharts.org/charts/stable/archivebox/">Official ArchiveBox TrueChart</a> / <a href="https://dev.to/finloop/setting-up-archivebox-on-truenas-scale-1788">Custom App Guide</a></li>
 <li><a href="https://unraid.net/community/apps?q=archivebox#r">UnRaid</a></li>
 <li><a href="https://github.com/YunoHost-Apps/archivebox_ynh">Yunohost</a></li>
 <li><a href="https://www.cloudron.io/store/io.archivebox.cloudronapp.html">Cloudron</a></li>
@ -445,6 +445,9 @@ Other providers of paid ArchiveBox hosting (not officially endorsed):<br/>
 <li><a href="https://fly.io/">
 <img src="https://img.shields.io/badge/Unmanaged_App-Fly.io-%239a2de6.svg?style=flat" height="22px"/>
 </a> (USD $10-50+/mo, <a href="https://fly.io/docs/hands-on/start/">instructions</a>)</li>
+<li><a href="https://railway.app/template/2Vvhmy">
+ <img src="https://img.shields.io/badge/Unmanaged_App-Railway-%23A11BE6.svg?style=flat" height="22px"/>
+</a> (USD $0-5+/mo)</li>
 <li><a href="https://aws.amazon.com/marketplace/pp/Linnovate-Open-Source-Innovation-Support-For-Archi/B08RVW6MJ2"><img src="https://img.shields.io/badge/Unmanaged_VPS-AWS-%23ee8135.svg?style=flat" height="22px"/></a> (USD $60-200+/mo)</li>
 <li><a href="https://azuremarketplace.microsoft.com/en-us/marketplace/apps/meanio.archivebox?ocid=gtmrewards_whatsnewblog_archivebox_vol118"><img src="https://img.shields.io/badge/Unmanaged_VPS-Azure-%237cb300.svg?style=flat" height="22px"/></a> (USD $60-200+/mo)</li>
 <br/>
--- a/archivebox/init.py
+++ b/archivebox/init.py
@ -1 +1,7 @@
 __package__ = 'archivebox'
+
+
+# monkey patch django timezone to add back utc (it was removed in Django 5.0)
+import datetime
+from django.utils import timezone
+timezone.utc = datetime.timezone.utc
--- a/archivebox/api/init.py
+++ b/archivebox/api/init.py
@ -0,0 +1 @@
+__package__ = 'archivebox.api'
--- a/archivebox/api/apps.py
+++ b/archivebox/api/apps.py
@ -1,3 +1,5 @@
+__package__ = 'archivebox.api'
+
 from django.apps import AppConfig


--- a/archivebox/api/archive.py
+++ b/archivebox/api/archive.py
@ -1,184 +0,0 @@
-# archivebox_api.py
-from typing import List, Optional
-from enum import Enum
-from pydantic import BaseModel
-from ninja import Router
-from main import (
-    add,
-    remove,
-    update,
-    list_all,
-    ONLY_NEW,
-)  # Assuming these functions are defined in main.py
-
-
-# Schemas
-
-class StatusChoices(str, Enum):
-    indexed = 'indexed'
-    archived = 'archived'
-    unarchived = 'unarchived'
-    present = 'present'
-    valid = 'valid'
-    invalid = 'invalid'
-    duplicate = 'duplicate'
-    orphaned = 'orphaned'
-    corrupted = 'corrupted'
-    unrecognized = 'unrecognized'
-
-
-class AddURLSchema(BaseModel):
-    urls: List[str]
-    tag: str = ""
-    depth: int = 0
-    update: bool = not ONLY_NEW  # Default to the opposite of ONLY_NEW
-    update_all: bool = False
-    index_only: bool = False
-    overwrite: bool = False
-    init: bool = False
-    extractors: str = ""
-    parser: str = "auto"
-
-
-class RemoveURLSchema(BaseModel):
-    yes: bool = False
-    delete: bool = False
-    before: Optional[float] = None
-    after: Optional[float] = None
-    filter_type: str = "exact"
-    filter_patterns: Optional[List[str]] = None
-
-
-class UpdateSchema(BaseModel):
-    resume: Optional[float] = None
-    only_new: Optional[bool] = None
-    index_only: Optional[bool] = False
-    overwrite: Optional[bool] = False
-    before: Optional[float] = None
-    after: Optional[float] = None
-    status: Optional[StatusChoices] = None
-    filter_type: Optional[str] = 'exact'
-    filter_patterns: Optional[List[str]] = None
-    extractors: Optional[str] = ""
-
-
-class ListAllSchema(BaseModel):
-    filter_patterns: Optional[List[str]] = None
-    filter_type: str = 'exact'
-    status: Optional[StatusChoices] = None
-    after: Optional[float] = None
-    before: Optional[float] = None
-    sort: Optional[str] = None
-    csv: Optional[str] = None
-    json: bool = False
-    html: bool = False
-    with_headers: bool = False
-
-
-# API Router
-router = Router()
-
-
-@router.post("/add", response={200: dict})
-def api_add(request, payload: AddURLSchema):
-    try:
-        result = add(
-            urls=payload.urls,
-            tag=payload.tag,
-            depth=payload.depth,
-            update=payload.update,
-            update_all=payload.update_all,
-            index_only=payload.index_only,
-            overwrite=payload.overwrite,
-            init=payload.init,
-            extractors=payload.extractors,
-            parser=payload.parser,
-        )
-        # Currently the add function returns a list of ALL items in the DB, ideally only return new items
-        return {
-            "status": "success",
-            "message": "URLs added successfully.",
-            "result": str(result),
-        }
-    except Exception as e:
-        # Handle exceptions raised by the add function or during processing
-        return {"status": "error", "message": str(e)}
-
-
-@router.post("/remove", response={200: dict})
-def api_remove(request, payload: RemoveURLSchema):
-    try:
-        result = remove(
-            yes=payload.yes,
-            delete=payload.delete,
-            before=payload.before,
-            after=payload.after,
-            filter_type=payload.filter_type,
-            filter_patterns=payload.filter_patterns,
-        )
-        return {
-            "status": "success",
-            "message": "URLs removed successfully.",
-            "result": result,
-        }
-    except Exception as e:
-        # Handle exceptions raised by the remove function or during processing
-        return {"status": "error", "message": str(e)}
-
-
-@router.post("/update", response={200: dict})
-def api_update(request, payload: UpdateSchema):
-    try:
-        result = update(
-            resume=payload.resume,
-            only_new=payload.only_new,
-            index_only=payload.index_only,
-            overwrite=payload.overwrite,
-            before=payload.before,
-            after=payload.after,
-            status=payload.status,
-            filter_type=payload.filter_type,
-            filter_patterns=payload.filter_patterns,
-            extractors=payload.extractors,
-        )
-        return {
-            "status": "success",
-            "message": "Archive updated successfully.",
-            "result": result,
-        }
-    except Exception as e:
-        # Handle exceptions raised by the update function or during processing
-        return {"status": "error", "message": str(e)}
-
-
-@router.post("/list_all", response={200: dict})
-def api_list_all(request, payload: ListAllSchema):
-    try:
-        result = list_all(
-            filter_patterns=payload.filter_patterns,
-            filter_type=payload.filter_type,
-            status=payload.status,
-            after=payload.after,
-            before=payload.before,
-            sort=payload.sort,
-            csv=payload.csv,
-            json=payload.json,
-            html=payload.html,
-            with_headers=payload.with_headers,
-        )
-        # TODO: This is kind of bad, make the format a choice field
-        if payload.json:
-            return {"status": "success", "format": "json", "data": result}
-        elif payload.html:
-            return {"status": "success", "format": "html", "data": result}
-        elif payload.csv:
-            return {"status": "success", "format": "csv", "data": result}
-        else:
-            return {
-                "status": "success",
-                "message": "List generated successfully.",
-                "data": result,
-            }
-    except Exception as e:
-        # Handle exceptions raised by the list_all function or during processing
-        return {"status": "error", "message": str(e)}
--- a/archivebox/api/auth.py
+++ b/archivebox/api/auth.py
@ -1,48 +1,107 @@
+__package__ = 'archivebox.api'
+
+from typing import Optional
+
+from django.http import HttpRequest
+from django.contrib.auth import login
 from django.contrib.auth import authenticate
-from ninja import Form, Router, Schema
-from ninja.security import HttpBearer
+from django.contrib.auth.models import AbstractBaseUser

-from api.models import Token
-
-router = Router()
+from ninja.security import HttpBearer, APIKeyQuery, APIKeyHeader, HttpBasicAuth, django_auth_superuser


-class GlobalAuth(HttpBearer):
-    def authenticate(self, request, token):
+def auth_using_token(token, request: Optional[HttpRequest]=None) -> Optional[AbstractBaseUser]:
+    """Given an API token string, check if a corresponding non-expired APIToken exists, and return its user"""
+    from api.models import APIToken        # lazy import model to avoid loading it at urls.py import time
+    
+    user = None
+
+    submitted_empty_form = token in ('string', '', None)
+    if submitted_empty_form:
+        user = request.user       # see if user is authed via django session and use that as the default
+    else:
        try:
-            return Token.objects.get(token=token).user
-        except Token.DoesNotExist:
+            token = APIToken.objects.get(token=token)
+            if token.is_valid():
+                user = token.user
+        except APIToken.DoesNotExist:
            pass

+    if not user:
+        print('[❌] Failed to authenticate API user using API Key:', request)

-class AuthSchema(Schema):
-    email: str
-    password: str
+    return None

-
-@router.post("/authenticate", auth=None)  # overriding global auth
-def get_token(request, auth_data: AuthSchema):
-    user = authenticate(username=auth_data.email, password=auth_data.password)
-    if user:
-        # Assuming a user can have multiple tokens and you want to create a new one every time
-        new_token = Token.objects.create(user=user)
-        return {"token": new_token.token, "expires": new_token.expiry_as_iso8601}
+def auth_using_password(username, password, request: Optional[HttpRequest]=None) -> Optional[AbstractBaseUser]:
+    """Given a username and password, check if they are valid and return the corresponding user"""
+    user = None
+    
+    submitted_empty_form = (username, password) in (('string', 'string'), ('', ''), (None, None))
+    if submitted_empty_form:
+        user = request.user       # see if user is authed via django session and use that as the default
    else:
-        return {"error": "Invalid credentials"}
+        user = authenticate(
+            username=username,
+            password=password,
+        )
+
+    if not user:
+        print('[❌] Failed to authenticate API user using API Key:', request)
+
+    return user


-class TokenValidationSchema(Schema):
-    token: str
+### Base Auth Types
+
+class APITokenAuthCheck:
+    """The base class for authentication methods that use an api.models.APIToken"""
+    def authenticate(self, request: HttpRequest, key: Optional[str]=None) -> Optional[AbstractBaseUser]:
+        user = auth_using_token(
+            token=key,
+            request=request,
+        )
+        if user is not None:
+            login(request, user, backend='django.contrib.auth.backends.ModelBackend')
+        return user
+
+class UserPassAuthCheck:
+    """The base class for authentication methods that use a username & password"""
+    def authenticate(self, request: HttpRequest, username: Optional[str]=None, password: Optional[str]=None) -> Optional[AbstractBaseUser]:
+        user = auth_using_password(
+            username=username,
+            password=password,
+            request=request,
+        )
+        if user is not None:
+            login(request, user, backend='django.contrib.auth.backends.ModelBackend')
+        return user


-@router.post("/validate_token", auth=None) # No authentication required for this endpoint
-def validate_token(request, token_data: TokenValidationSchema):
-    try:
-        # Attempt to authenticate using the provided token
-        user = GlobalAuth().authenticate(request, token_data.token)
-        if user:
-            return {"status": "valid"}
-        else:
-            return {"status": "invalid"}
-    except Token.DoesNotExist:
-        return {"status": "invalid"}
+### Django-Ninja-Provided Auth Methods
+
+class UsernameAndPasswordAuth(UserPassAuthCheck, HttpBasicAuth):
+    """Allow authenticating by passing username & password via HTTP Basic Authentication (not recommended)"""
+    pass
+
+class QueryParamTokenAuth(APITokenAuthCheck, APIKeyQuery):
+    """Allow authenticating by passing api_key=xyz as a GET/POST query parameter"""
+    param_name = "api_key"
+
+class HeaderTokenAuth(APITokenAuthCheck, APIKeyHeader):
+    """Allow authenticating by passing X-API-Key=xyz as a request header"""
+    param_name = "X-API-Key"
+
+class BearerTokenAuth(APITokenAuthCheck, HttpBearer):
+    """Allow authenticating by passing Bearer=xyz as a request header"""
+    pass
+
+
+### Enabled Auth Methods
+
+API_AUTH_METHODS = [
+    QueryParamTokenAuth(), 
+    HeaderTokenAuth(),
+    BearerTokenAuth(),
+    django_auth_superuser,
+    UsernameAndPasswordAuth(),
+]
--- a/archivebox/api/migrations/0001_initial.py
+++ b/archivebox/api/migrations/0001_initial.py
@ -1,9 +1,10 @@
-# Generated by Django 3.1.14 on 2024-04-09 18:52
+# Generated by Django 4.2.11 on 2024-04-25 04:19

 import api.models
 from django.conf import settings
 from django.db import migrations, models
 import django.db.models.deletion
+import uuid


 class Migration(migrations.Migration):
@ -16,13 +17,13 @@ class Migration(migrations.Migration):

    operations = [
        migrations.CreateModel(
-            name='Token',
+            name='APIToken',
            fields=[
-                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
-                ('token', models.CharField(default=auth.models.hex_uuid, max_length=32, unique=True)),
+                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
+                ('token', models.CharField(default=api.models.generate_secret_token, max_length=32, unique=True)),
                ('created', models.DateTimeField(auto_now_add=True)),
-                ('expiry', models.DateTimeField(blank=True, null=True)),
-                ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='tokens', to=settings.AUTH_USER_MODEL)),
+                ('expires', models.DateTimeField(blank=True, null=True)),
+                ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
            ],
        ),
    ]
--- a/archivebox/api/migrations/0002_alter_apitoken_options.py
+++ b/archivebox/api/migrations/0002_alter_apitoken_options.py
@ -0,0 +1,17 @@
+# Generated by Django 5.0.4 on 2024-04-26 05:28
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('api', '0001_initial'),
+    ]
+
+    operations = [
+        migrations.AlterModelOptions(
+            name='apitoken',
+            options={'verbose_name': 'API Key', 'verbose_name_plural': 'API Keys'},
+        ),
+    ]
--- a/archivebox/api/models.py
+++ b/archivebox/api/models.py
@ -1,30 +1,63 @@
+__package__ = 'archivebox.api'
+
 import uuid
+import secrets
 from datetime import timedelta

 from django.conf import settings
 from django.db import models
 from django.utils import timezone
-from django.utils.translation import gettext_lazy as _

-def hex_uuid():
-    return uuid.uuid4().hex
+from django_stubs_ext.db.models import TypedModelMeta


-class Token(models.Model):
-    user = models.ForeignKey(
-        settings.AUTH_USER_MODEL, on_delete=models.CASCADE, related_name="tokens"
-    )
-    token = models.CharField(max_length=32, default=hex_uuid, unique=True)
+def generate_secret_token() -> str:
+    # returns cryptographically secure string with len() == 32
+    return secrets.token_hex(16)
+
+
+class APIToken(models.Model):
+    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
+
+    user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
+    token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
+    
    created = models.DateTimeField(auto_now_add=True)
-    expiry = models.DateTimeField(null=True, blank=True)
+    expires = models.DateTimeField(null=True, blank=True)
+
+    class Meta(TypedModelMeta):
+        verbose_name = "API Key"
+        verbose_name_plural = "API Keys"
+
+    def __str__(self) -> str:
+        return self.token
+
+    def __repr__(self) -> str:
+        return f'<APIToken user={self.user.username} token=************{self.token[-4:]}>'
+
+    def __json__(self) -> dict:
+        return {
+            "TYPE":             "APIToken",    
+            "id":               str(self.id),
+            "user_id":          str(self.user.id),
+            "user_username":    self.user.username,
+            "token":            self.token,
+            "created":          self.created.isoformat(),
+            "expires":          self.expires_as_iso8601,
+        }

    @property
-    def expiry_as_iso8601(self):
+    def expires_as_iso8601(self):
        """Returns the expiry date of the token in ISO 8601 format or a date 100 years in the future if none."""
-        expiry_date = (
-            self.expiry if self.expiry else timezone.now() + timedelta(days=365 * 100)
-        )
+        expiry_date = self.expires or (timezone.now() + timedelta(days=365 * 100))
+
        return expiry_date.isoformat()

-    def __str__(self):
-        return self.token
+    def is_valid(self, for_date=None):
+        for_date = for_date or timezone.now()
+
+        if self.expires and self.expires < for_date:
+            return False
+
+        return True
+
--- a/archivebox/api/tests.py
+++ b/archivebox/api/tests.py
@ -1,27 +1,30 @@
+__package__ = 'archivebox.api'
+
 from django.test import TestCase
 from ninja.testing import TestClient
-from archivebox.api.archive import router as archive_router

-class ArchiveBoxAPITestCase(TestCase):
+from .routes_cli import router
+
+class ArchiveBoxCLIAPITestCase(TestCase):
    def setUp(self):
-        self.client = TestClient(archive_router)
+        self.client = TestClient(router)

    def test_add_endpoint(self):
-        response = self.client.post("/add", json={"urls": ["http://example.com"], "tag": "test"})
+        response = self.client.post("/add", json={"urls": ["http://example.com"], "tag": "testTag1,testTag2"})
        self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["status"], "success")
+        self.assertTrue(response.json()["success"])

    def test_remove_endpoint(self):
        response = self.client.post("/remove", json={"filter_patterns": ["http://example.com"]})
        self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["status"], "success")
+        self.assertTrue(response.json()["success"])

    def test_update_endpoint(self):
        response = self.client.post("/update", json={})
        self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["status"], "success")
+        self.assertTrue(response.json()["success"])

    def test_list_all_endpoint(self):
        response = self.client.post("/list_all", json={})
        self.assertEqual(response.status_code, 200)
-        self.assertTrue("success" in response.json()["status"])
+        self.assertTrue(response.json()["success"])
--- a/archivebox/api/urls.py
+++ b/archivebox/api/urls.py
@ -0,0 +1,17 @@
+__package__ = 'archivebox.api'
+
+from django.urls import path
+from django.views.generic.base import RedirectView
+
+from .v1_api import urls as v1_api_urls
+
+urlpatterns = [
+    path("",                 RedirectView.as_view(url='/api/v1')),
+
+    path("v1/",              v1_api_urls),
+    path("v1",               RedirectView.as_view(url='/api/v1/docs')),
+
+    # ... v2 can be added here ...
+    # path("v2/",              v2_api_urls),
+    # path("v2",               RedirectView.as_view(url='/api/v2/docs')),
+]
--- a/archivebox/api/v1_api.py
+++ b/archivebox/api/v1_api.py
@ -0,0 +1,111 @@
+__package__ = 'archivebox.api'
+
+
+from io import StringIO
+from traceback import format_exception
+from contextlib import redirect_stdout, redirect_stderr
+
+from django.http import HttpRequest, HttpResponse
+from django.core.exceptions import ObjectDoesNotExist, EmptyResultSet, PermissionDenied
+
+from ninja import NinjaAPI, Swagger
+
+# TODO: explore adding https://eadwincode.github.io/django-ninja-extra/
+
+from api.auth import API_AUTH_METHODS
+from ..config import VERSION, COMMIT_HASH
+
+
+COMMIT_HASH = COMMIT_HASH or 'unknown'
+
+html_description=f'''
+<h3>Welcome to your ArchiveBox server's REST API <code>[v1 ALPHA]</code> homepage!</h3>
+<br/>
+<i><b>WARNING: This API is still in an early development stage and may change!</b></i>
+<br/>
+<ul>
+<li>⬅️ Manage your server: <a href="/admin/api/"><b>Setup API Keys</b></a>, <a href="/admin/">Go to your Server Admin UI</a>, <a href="/">Go to your Snapshots list</a> 
+<li>💬 Ask questions and get help here: <a href="https://zulip.archivebox.io">ArchiveBox Chat Forum</a></li>
+<li>🐞 Report API bugs here: <a href="https://github.com/ArchiveBox/ArchiveBox/issues">Github Issues</a></li>
+<li>📚 ArchiveBox Documentation: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki">Github Wiki</a></li>
+<li>📜 See the API source code: <a href="https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/api"><code>archivebox/api/</code></a></li>
+</ul>
+<small>Served by ArchiveBox v{VERSION} (<a href="https://github.com/ArchiveBox/ArchiveBox/commit/{COMMIT_HASH}"><code>{COMMIT_HASH[:8]}</code></a>), API powered by <a href="https://django-ninja.dev/"><code>django-ninja</code></a>.</small>
+'''
+
+
+def register_urls(api: NinjaAPI) -> NinjaAPI:
+    api.add_router('/auth/',     'api.v1_auth.router')
+    api.add_router('/core/',     'api.v1_core.router')
+    api.add_router('/cli/',      'api.v1_cli.router')
+    return api
+
+
+class NinjaAPIWithIOCapture(NinjaAPI):    
+    def create_temporal_response(self, request: HttpRequest) -> HttpResponse:
+        stdout, stderr = StringIO(), StringIO()
+
+        with redirect_stderr(stderr):
+            with redirect_stdout(stdout):
+                request.stdout = stdout
+                request.stderr = stderr
+
+                response = super().create_temporal_response(request)
+
+        print('RESPONDING NOW', response)
+
+        return response
+
+
+api = NinjaAPIWithIOCapture(
+    title='ArchiveBox API',
+    description=html_description,
+    version='1.0.0',
+    csrf=False,
+    auth=API_AUTH_METHODS,
+    urls_namespace="api",
+    docs=Swagger(settings={"persistAuthorization": True}),
+    # docs_decorator=login_required,
+    # renderer=ORJSONRenderer(),
+)
+api = register_urls(api)
+urls = api.urls
+
+
+@api.exception_handler(Exception)
+def generic_exception_handler(request, err):
+    status = 503
+    if isinstance(err, (ObjectDoesNotExist, EmptyResultSet, PermissionDenied)):
+        status = 404
+
+    print(''.join(format_exception(err)))
+
+    return api.create_response(
+        request,
+        {
+            "succeeded": False,
+            "message": f'{err.__class__.__name__}: {err}',
+            "errors": [
+                ''.join(format_exception(err)),
+                # or send simpler parent-only traceback:
+                # *([str(err.__context__)] if getattr(err, '__context__', None) else []),
+            ],
+        },
+        status=status,
+    )
+
+
+
+# import orjson
+# from ninja.renderers import BaseRenderer
+# class ORJSONRenderer(BaseRenderer):
+#     media_type = "application/json"
+#     def render(self, request, data, *, response_status):
+#         return {
+#             "success": True,
+#             "errors": [],
+#             "result": data,
+#             "stdout": ansi_to_html(stdout.getvalue().strip()),
+#             "stderr": ansi_to_html(stderr.getvalue().strip()),
+#         }
+#         return orjson.dumps(data)
--- a/archivebox/api/v1_auth.py
+++ b/archivebox/api/v1_auth.py
@ -0,0 +1,52 @@
+__package__ = 'archivebox.api'
+
+from typing import Optional
+
+from ninja import Router, Schema
+
+from api.models import APIToken
+from api.auth import auth_using_token, auth_using_password
+
+
+router = Router(tags=['Authentication'])
+
+
+class PasswordAuthSchema(Schema):
+    """Schema for a /get_api_token request"""
+    username: Optional[str] = None
+    password: Optional[str] = None
+
+
+@router.post("/get_api_token", auth=None, summary='Generate an API token for a given username & password (or currently logged-in user)')             # auth=None because they are not authed yet
+def get_api_token(request, auth_data: PasswordAuthSchema):
+    user = auth_using_password(
+        username=auth_data.username,
+        password=auth_data.password,
+        request=request,
+    )
+
+    if user:
+        # TODO: support multiple tokens in the future, for now we just have one per user
+        api_token, created = APIToken.objects.get_or_create(user=user)
+
+        return api_token.__json__()
+    
+    return {"success": False, "errors": ["Invalid credentials"]}
+
+
+
+class TokenAuthSchema(Schema):
+    """Schema for a /check_api_token request"""
+    token: str
+
+
+@router.post("/check_api_token", auth=None, summary='Validate an API token to make sure its valid and non-expired')        # auth=None because they are not authed yet
+def check_api_token(request, token_data: TokenAuthSchema):
+    user = auth_using_token(
+        token=token_data.token,
+        request=request,
+    )
+    if user:
+        return {"success": True, "user_id": str(user.id)}
+    
+    return {"success": False, "user_id": None}
--- a/archivebox/api/v1_cli.py
+++ b/archivebox/api/v1_cli.py
@ -0,0 +1,234 @@
+__package__ = 'archivebox.api'
+
+from typing import List, Dict, Any, Optional
+from enum import Enum
+
+from ninja import Router, Schema
+
+from ..main import (
+    add,
+    remove,
+    update,
+    list_all,
+    schedule,
+)
+from ..util import ansi_to_html
+from ..config import ONLY_NEW
+
+
+# router for API that exposes archivebox cli subcommands as REST endpoints
+router = Router(tags=['ArchiveBox CLI Sub-Commands'])
+
+
+# Schemas
+
+JSONType = List[Any] | Dict[str, Any] | bool | int | str | None
+
+class CLICommandResponseSchema(Schema):
+    success: bool
+    errors: List[str]
+    result: JSONType
+    stdout: str
+    stderr: str
+
+class FilterTypeChoices(str, Enum):
+    exact = 'exact'
+    substring = 'substring'
+    regex = 'regex'
+    domain = 'domain'
+    tag = 'tag'
+    timestamp = 'timestamp'
+
+class StatusChoices(str, Enum):
+    indexed = 'indexed'
+    archived = 'archived'
+    unarchived = 'unarchived'
+    present = 'present'
+    valid = 'valid'
+    invalid = 'invalid'
+    duplicate = 'duplicate'
+    orphaned = 'orphaned'
+    corrupted = 'corrupted'
+    unrecognized = 'unrecognized'
+
+
+class AddCommandSchema(Schema):
+    urls: List[str]
+    tag: str = ""
+    depth: int = 0
+    update: bool = not ONLY_NEW  # Default to the opposite of ONLY_NEW
+    update_all: bool = False
+    index_only: bool = False
+    overwrite: bool = False
+    init: bool = False
+    extractors: str = ""
+    parser: str = "auto"
+
+class UpdateCommandSchema(Schema):
+    resume: Optional[float] = 0
+    only_new: bool = ONLY_NEW
+    index_only: bool = False
+    overwrite: bool = False
+    after: Optional[float] = 0
+    before: Optional[float] = 999999999999999
+    status: Optional[StatusChoices] = StatusChoices.unarchived
+    filter_type: Optional[str] = FilterTypeChoices.substring
+    filter_patterns: Optional[List[str]] = ['https://example.com']
+    extractors: Optional[str] = ""
+
+class ScheduleCommandSchema(Schema):
+    import_path: Optional[str] = None
+    add: bool = False
+    every: Optional[str] = None
+    tag: str = ''
+    depth: int = 0
+    overwrite: bool = False
+    update: bool = not ONLY_NEW
+    clear: bool = False
+
+class ListCommandSchema(Schema):
+    filter_patterns: Optional[List[str]] = ['https://example.com']
+    filter_type: str = FilterTypeChoices.substring
+    status: Optional[StatusChoices] = StatusChoices.indexed
+    after: Optional[float] = 0
+    before: Optional[float] = 999999999999999
+    sort: str = 'added'
+    as_json: bool = True
+    as_html: bool = False
+    as_csv: str | bool = 'timestamp,url'
+    with_headers: bool = False
+
+class RemoveCommandSchema(Schema):
+    delete: bool = True
+    after: Optional[float] = 0
+    before: Optional[float] = 999999999999999
+    filter_type: str = FilterTypeChoices.exact
+    filter_patterns: Optional[List[str]] = ['https://example.com']
+
+
+
+
+
+@router.post("/add", response=CLICommandResponseSchema, summary='archivebox add [args] [urls]')
+def cli_add(request, args: AddCommandSchema):
+    result = add(
+        urls=args.urls,
+        tag=args.tag,
+        depth=args.depth,
+        update=args.update,
+        update_all=args.update_all,
+        index_only=args.index_only,
+        overwrite=args.overwrite,
+        init=args.init,
+        extractors=args.extractors,
+        parser=args.parser,
+    )
+
+    return {
+        "success": True,
+        "errors": [],
+        "result": result,
+        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
+        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
+    }
+
+
+@router.post("/update", response=CLICommandResponseSchema, summary='archivebox update [args] [filter_patterns]')
+def cli_update(request, args: UpdateCommandSchema):
+    result = update(
+        resume=args.resume,
+        only_new=args.only_new,
+        index_only=args.index_only,
+        overwrite=args.overwrite,
+        before=args.before,
+        after=args.after,
+        status=args.status,
+        filter_type=args.filter_type,
+        filter_patterns=args.filter_patterns,
+        extractors=args.extractors,
+    )
+    return {
+        "success": True,
+        "errors": [],
+        "result": result,
+        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
+        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
+    }
+
+
+@router.post("/schedule", response=CLICommandResponseSchema, summary='archivebox schedule [args] [import_path]')
+def cli_schedule(request, args: ScheduleCommandSchema):
+    result = schedule(
+        import_path=args.import_path,
+        add=args.add,
+        show=args.show,
+        clear=args.clear,
+        every=args.every,
+        tag=args.tag,
+        depth=args.depth,
+        overwrite=args.overwrite,
+        update=args.update,
+    )
+
+    return {
+        "success": True,
+        "errors": [],
+        "result": result,
+        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
+        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
+    }
+
+
+
+@router.post("/list", response=CLICommandResponseSchema, summary='archivebox list [args] [filter_patterns]')
+def cli_list(request, args: ListCommandSchema):
+    result = list_all(
+        filter_patterns=args.filter_patterns,
+        filter_type=args.filter_type,
+        status=args.status,
+        after=args.after,
+        before=args.before,
+        sort=args.sort,
+        csv=args.as_csv,
+        json=args.as_json,
+        html=args.as_html,
+        with_headers=args.with_headers,
+    )
+
+    result_format = 'txt'
+    if args.as_json:
+        result_format = "json"
+    elif args.as_html:
+        result_format = "html"
+    elif args.as_csv:
+        result_format = "csv"
+
+    return {
+        "success": True,
+        "errors": [],
+        "result": result,
+        "result_format": result_format,
+        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
+        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
+    }
+    
+
+
+@router.post("/remove", response=CLICommandResponseSchema, summary='archivebox remove [args] [filter_patterns]')
+def cli_remove(request, args: RemoveCommandSchema):
+    result = remove(
+        yes=True,            # no way to interactively ask for confirmation via API, so we force yes
+        delete=args.delete,
+        before=args.before,
+        after=args.after,
+        filter_type=args.filter_type,
+        filter_patterns=args.filter_patterns,
+    )
+    return {
+        "success": True,
+        "errors": [],
+        "result": result,
+        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
+        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
+    }
+    
--- a/archivebox/api/v1_core.py
+++ b/archivebox/api/v1_core.py
@ -0,0 +1,210 @@
+__package__ = 'archivebox.api'
+
+from uuid import UUID
+from typing import List, Optional
+from datetime import datetime
+
+from django.shortcuts import get_object_or_404
+
+from ninja import Router, Schema, FilterSchema, Field, Query
+from ninja.pagination import paginate
+
+from core.models import Snapshot, ArchiveResult, Tag
+
+
+router = Router(tags=['Core Models'])
+
+
+
+
+### ArchiveResult #########################################################################
+
+class ArchiveResultSchema(Schema):
+    id: UUID
+
+    snapshot_id: UUID
+    snapshot_url: str
+    snapshot_tags: str
+
+    extractor: str
+    cmd: List[str]
+    pwd: str
+    cmd_version: str
+    output: str
+    status: str
+
+    created: datetime
+
+    @staticmethod
+    def resolve_id(obj):
+        return obj.uuid
+
+    @staticmethod
+    def resolve_created(obj):
+        return obj.start_ts
+
+    @staticmethod
+    def resolve_snapshot_url(obj):
+        return obj.snapshot.url
+
+    @staticmethod
+    def resolve_snapshot_tags(obj):
+        return obj.snapshot.tags_str()
+
+
+class ArchiveResultFilterSchema(FilterSchema):
+    id: Optional[UUID] = Field(None, q='uuid')
+
+    search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains'])
+    snapshot_id: Optional[UUID] = Field(None, q='snapshot_id')
+    snapshot_url: Optional[str] = Field(None, q='snapshot__url')
+    snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name')
+    
+    status: Optional[str] = Field(None, q='status')
+    output: Optional[str] = Field(None, q='output__icontains')
+    extractor: Optional[str] = Field(None, q='extractor__icontains')
+    cmd: Optional[str] = Field(None, q='cmd__0__icontains')
+    pwd: Optional[str] = Field(None, q='pwd__icontains')
+    cmd_version: Optional[str] = Field(None, q='cmd_version')
+
+    created: Optional[datetime] = Field(None, q='updated')
+    created__gte: Optional[datetime] = Field(None, q='updated__gte')
+    created__lt: Optional[datetime] = Field(None, q='updated__lt')
+
+
+@router.get("/archiveresults", response=List[ArchiveResultSchema])
+@paginate
+def list_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)):
+    qs = ArchiveResult.objects.all()
+    results = filters.filter(qs)
+    return results
+
+
+@router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
+def get_archiveresult(request, archiveresult_id: str):
+    archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
+    return archiveresult
+
+
+# @router.post("/archiveresult", response=ArchiveResultSchema)
+# def create_archiveresult(request, payload: ArchiveResultSchema):
+#     archiveresult = ArchiveResult.objects.create(**payload.dict())
+#     return archiveresult
+#
+# @router.put("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
+# def update_archiveresult(request, archiveresult_id: str, payload: ArchiveResultSchema):
+#     archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
+#   
+#     for attr, value in payload.dict().items():
+#         setattr(archiveresult, attr, value)
+#     archiveresult.save()
+#
+#     return archiveresult
+#
+# @router.delete("/archiveresult/{archiveresult_id}")
+# def delete_archiveresult(request, archiveresult_id: str):
+#     archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
+#     archiveresult.delete()
+#     return {"success": True}
+
+
+
+
+
+### Snapshot #########################################################################
+
+
+class SnapshotSchema(Schema):
+    id: UUID
+
+    url: str
+    tags: str
+    title: Optional[str]
+    timestamp: str
+    bookmarked: datetime
+    added: datetime
+    updated: datetime
+    archive_path: str
+
+    archiveresults: List[ArchiveResultSchema]
+
+    # @staticmethod
+    # def resolve_id(obj):
+    #     return str(obj.id)
+
+    @staticmethod
+    def resolve_tags(obj):
+        return obj.tags_str()
+
+    @staticmethod
+    def resolve_archiveresults(obj, context):
+        if context['request'].with_archiveresults:
+            return obj.archiveresult_set.all().distinct()
+        return ArchiveResult.objects.none()
+
+
+class SnapshotFilterSchema(FilterSchema):
+    id: Optional[UUID] = Field(None, q='id')
+
+    search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains'])
+    url: Optional[str] = Field(None, q='url')
+    tag: Optional[str] = Field(None, q='tags__name')
+    title: Optional[str] = Field(None, q='title__icontains')
+    
+    timestamp: Optional[str] = Field(None, q='timestamp__startswith')
+    
+    added: Optional[datetime] = Field(None, q='added')
+    added__gte: Optional[datetime] = Field(None, q='added__gte')
+    added__lt: Optional[datetime] = Field(None, q='added__lt')
+
+
+@router.get("/snapshots", response=List[SnapshotSchema])
+@paginate
+def list_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=True):
+    request.with_archiveresults = with_archiveresults
+
+    qs = Snapshot.objects.all()
+    results = filters.filter(qs)
+    return results
+
+@router.get("/snapshot/{snapshot_id}", response=SnapshotSchema)
+def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
+    request.with_archiveresults = with_archiveresults
+    snapshot = get_object_or_404(Snapshot, id=snapshot_id)
+    return snapshot
+
+
+# @router.post("/snapshot", response=SnapshotSchema)
+# def create_snapshot(request, payload: SnapshotSchema):
+#     snapshot = Snapshot.objects.create(**payload.dict())
+#     return snapshot
+#
+# @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema)
+# def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema):
+#     snapshot = get_object_or_404(Snapshot, id=snapshot_id)
+#
+#     for attr, value in payload.dict().items():
+#         setattr(snapshot, attr, value)
+#     snapshot.save()
+#
+#     return snapshot
+#
+# @router.delete("/snapshot/{snapshot_id}")
+# def delete_snapshot(request, snapshot_id: str):
+#     snapshot = get_object_or_404(Snapshot, id=snapshot_id)
+#     snapshot.delete()
+#     return {"success": True}
+
+
+
+### Tag #########################################################################
+
+
+class TagSchema(Schema):
+    name: str
+    slug: str
+
+
+@router.get("/tags", response=List[TagSchema])
+def list_tags(request):
+    return Tag.objects.all()
--- a/archivebox/config.py
+++ b/archivebox/config.py
@ -112,7 +112,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
        'LDAP_FIRSTNAME_ATTR':       {'type': str,   'default': None},
        'LDAP_LASTNAME_ATTR':        {'type': str,   'default': None},
        'LDAP_EMAIL_ATTR':           {'type': str,   'default': None},
-        'LDAP_CREATE_SUPERUSER':      {'type': bool,  'default': False},
+        'LDAP_CREATE_SUPERUSER':     {'type': bool,  'default': False},
    },

    'ARCHIVE_METHOD_TOGGLES': {
@ -265,7 +265,7 @@ CONFIG_ALIASES = {
        for key, default in section.items()
            for alias in default.get('aliases', ())
 }
-USER_CONFIG = {key for section in CONFIG_SCHEMA.values() for key in section.keys()}
+USER_CONFIG = {key: section[key] for section in CONFIG_SCHEMA.values() for key in section.keys()}

 def get_real_name(key: str) -> str:
    """get the current canonical name for a given deprecated config key"""
@ -282,6 +282,7 @@ ARCHIVE_DIR_NAME = 'archive'
 SOURCES_DIR_NAME = 'sources'
 LOGS_DIR_NAME = 'logs'
 PERSONAS_DIR_NAME = 'personas'
+CRONTABS_DIR_NAME = 'crontabs'
 SQL_INDEX_FILENAME = 'index.sqlite3'
 JSON_INDEX_FILENAME = 'index.json'
 HTML_INDEX_FILENAME = 'index.html'
@ -355,7 +356,7 @@ ALLOWED_IN_OUTPUT_DIR = {
    'static',
    'sonic',
    'search.sqlite3',
-    'crontabs',
+    CRONTABS_DIR_NAME,
    ARCHIVE_DIR_NAME,
    SOURCES_DIR_NAME,
    LOGS_DIR_NAME,
@ -598,7 +599,6 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {

    'DEPENDENCIES':             {'default': lambda c: get_dependency_info(c)},
    'CODE_LOCATIONS':           {'default': lambda c: get_code_locations(c)},
-    'EXTERNAL_LOCATIONS':       {'default': lambda c: get_external_locations(c)},
    'DATA_LOCATIONS':           {'default': lambda c: get_data_locations(c)},
    'CHROME_OPTIONS':           {'default': lambda c: get_chrome_info(c)},
    'CHROME_EXTRA_ARGS':        {'default': lambda c: c['CHROME_EXTRA_ARGS'] or []},
@ -985,11 +985,6 @@ def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
            'enabled': True,
            'is_valid': (config['TEMPLATES_DIR'] / 'static').exists(),
        },
-        'CUSTOM_TEMPLATES_DIR': {
-            'path': config['CUSTOM_TEMPLATES_DIR'] and Path(config['CUSTOM_TEMPLATES_DIR']).resolve(),
-            'enabled': bool(config['CUSTOM_TEMPLATES_DIR']),
-            'is_valid': config['CUSTOM_TEMPLATES_DIR'] and Path(config['CUSTOM_TEMPLATES_DIR']).exists(),
-        },
        # 'NODE_MODULES_DIR': {
        #     'path': ,
        #     'enabled': ,
@ -997,50 +992,25 @@ def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
        # },
    }

-def get_external_locations(config: ConfigDict) -> ConfigValue:
-    abspath = lambda path: None if path is None else Path(path).resolve()
-    return {
-        'CHROME_USER_DATA_DIR': {
-            'path': abspath(config['CHROME_USER_DATA_DIR']),
-            'enabled': config['USE_CHROME'] and config['CHROME_USER_DATA_DIR'],
-            'is_valid': False if config['CHROME_USER_DATA_DIR'] is None else (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists(),
-        },
-        'COOKIES_FILE': {
-            'path': abspath(config['COOKIES_FILE']),
-            'enabled': config['USE_WGET'] and config['COOKIES_FILE'],
-            'is_valid': False if config['COOKIES_FILE'] is None else Path(config['COOKIES_FILE']).exists(),
-        },
-    }
-
 def get_data_locations(config: ConfigDict) -> ConfigValue:
    return {
+        # OLD: migrating to personas
+        # 'CHROME_USER_DATA_DIR': {
+        #     'path': os.path.abspath(config['CHROME_USER_DATA_DIR']),
+        #     'enabled': config['USE_CHROME'] and config['CHROME_USER_DATA_DIR'],
+        #     'is_valid': False if config['CHROME_USER_DATA_DIR'] is None else (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists(),
+        # },
+        # 'COOKIES_FILE': {
+        #     'path': os.path.abspath(config['COOKIES_FILE']),
+        #     'enabled': config['USE_WGET'] and config['COOKIES_FILE'],
+        #     'is_valid': False if config['COOKIES_FILE'] is None else Path(config['COOKIES_FILE']).exists(),
+        # },
        'OUTPUT_DIR': {
            'path': config['OUTPUT_DIR'].resolve(),
            'enabled': True,
            'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
            'is_mount': os.path.ismount(config['OUTPUT_DIR'].resolve()),
        },
-        'SOURCES_DIR': {
-            'path': config['SOURCES_DIR'].resolve(),
-            'enabled': True,
-            'is_valid': config['SOURCES_DIR'].exists(),
-        },
-        'LOGS_DIR': {
-            'path': config['LOGS_DIR'].resolve(),
-            'enabled': True,
-            'is_valid': config['LOGS_DIR'].exists(),
-        },
-        'PERSONAS_DIR': {
-            'path': config['PERSONAS_DIR'].resolve(),
-            'enabled': True,
-            'is_valid': config['PERSONAS_DIR'].exists(),
-        },
-        'ARCHIVE_DIR': {
-            'path': config['ARCHIVE_DIR'].resolve(),
-            'enabled': True,
-            'is_valid': config['ARCHIVE_DIR'].exists(),
-            'is_mount': os.path.ismount(config['ARCHIVE_DIR'].resolve()),
-        },
        'CONFIG_FILE': {
            'path': config['CONFIG_FILE'].resolve(),
            'enabled': True,
@ -1052,6 +1022,38 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
            'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
            'is_mount': os.path.ismount((config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).resolve()),
        },
+        'ARCHIVE_DIR': {
+            'path': config['ARCHIVE_DIR'].resolve(),
+            'enabled': True,
+            'is_valid': config['ARCHIVE_DIR'].exists(),
+            'is_mount': os.path.ismount(config['ARCHIVE_DIR'].resolve()),
+        },
+        'SOURCES_DIR': {
+            'path': config['SOURCES_DIR'].resolve(),
+            'enabled': True,
+            'is_valid': config['SOURCES_DIR'].exists(),
+        },
+        'LOGS_DIR': {
+            'path': config['LOGS_DIR'].resolve(),
+            'enabled': True,
+            'is_valid': config['LOGS_DIR'].exists(),
+        },
+        'CUSTOM_TEMPLATES_DIR': {
+            'path': config['CUSTOM_TEMPLATES_DIR'] and Path(config['CUSTOM_TEMPLATES_DIR']).resolve(),
+            'enabled': bool(config['CUSTOM_TEMPLATES_DIR']),
+            'is_valid': config['CUSTOM_TEMPLATES_DIR'] and Path(config['CUSTOM_TEMPLATES_DIR']).exists(),
+        },
+        'PERSONAS_DIR': {
+            'path': config['PERSONAS_DIR'].resolve(),
+            'enabled': True,
+            'is_valid': config['PERSONAS_DIR'].exists(),
+        },
+        # managed by bin/docker_entrypoint.sh and python-crontab:
+        # 'CRONTABS_DIR': {
+        #     'path': config['CRONTABS_DIR'].resolve(),
+        #     'enabled': True,
+        #     'is_valid': config['CRONTABS_DIR'].exists(),
+        # },
    }

 def get_dependency_info(config: ConfigDict) -> ConfigValue:
@ -1366,6 +1368,7 @@ def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=C
        stderr('        archivebox init')
        raise SystemExit(2)

+
 def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG):
    output_dir = out_dir or config['OUTPUT_DIR']
    from .index.sql import list_migrations
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@ -14,12 +14,17 @@ from django.shortcuts import render, redirect
 from django.contrib.auth import get_user_model
 from django import forms

+
+from signal_webhooks.apps import DjangoSignalWebhooksConfig
+from signal_webhooks.admin import WebhookAdmin, WebhookModel
+
 from ..util import htmldecode, urldecode, ansi_to_html

 from core.models import Snapshot, ArchiveResult, Tag
 from core.forms import AddLinkForm

 from core.mixins import SearchResultsAdminMixin
+from api.models import APIToken

 from index.html import snapshot_icons
 from logging_util import printable_filesize
@ -98,10 +103,32 @@ class ArchiveBoxAdmin(admin.AdminSite):

        return render(template_name='add.html', request=request, context=context)

+
+# monkey patch django-signals-webhooks to change how it shows up in Admin UI
+DjangoSignalWebhooksConfig.verbose_name = 'API'
+WebhookModel._meta.get_field('name').help_text = 'Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).'
+WebhookModel._meta.get_field('signal').help_text = 'The type of event the webhook should fire for (e.g. Create, Update, Delete).'
+WebhookModel._meta.get_field('ref').help_text = 'Dot import notation of the model the webhook should fire for (e.g. core.models.Snapshot or core.models.ArchiveResult).'
+WebhookModel._meta.get_field('endpoint').help_text = 'External URL to POST the webhook notification to (e.g. https://someapp.example.com/webhook/some-webhook-receiver).'
+WebhookModel._meta.app_label = 'api'
+
+
 archivebox_admin = ArchiveBoxAdmin()
 archivebox_admin.register(get_user_model())
+archivebox_admin.register(APIToken)
+archivebox_admin.register(WebhookModel, WebhookAdmin)
 archivebox_admin.disable_action('delete_selected')

+
+# patch admin with methods to add data views
+from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls
+
+archivebox_admin.get_app_list = get_app_list.__get__(archivebox_admin, ArchiveBoxAdmin)
+archivebox_admin.admin_data_index_view = admin_data_index_view.__get__(archivebox_admin, ArchiveBoxAdmin)
+archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_admin, ArchiveBoxAdmin)
+archivebox_admin.get_urls = get_urls(archivebox_admin.get_urls).__get__(archivebox_admin, ArchiveBoxAdmin)
+
+
 class ArchiveResultInline(admin.TabularInline):
    model = ArchiveResult

--- a/archivebox/core/apps.py
+++ b/archivebox/core/apps.py
@ -1,3 +1,5 @@
+__package__ = 'archivebox.core'
+
 from django.apps import AppConfig


@ -5,6 +7,22 @@ class CoreConfig(AppConfig):
    name = 'core'

    def ready(self):
+        # register our custom admin as the primary django admin
+        from django.contrib import admin
+        from django.contrib.admin import sites
+        from core.admin import archivebox_admin
+
+        admin.site = archivebox_admin
+        sites.site = archivebox_admin
+
+
+        # register signal handlers
        from .auth import register_signals

        register_signals()
+
+
+
+# from django.contrib.admin.apps import AdminConfig
+# class CoreAdminConfig(AdminConfig):
+#     default_site = "core.admin.get_admin_site"
--- a/archivebox/core/auth.py
+++ b/archivebox/core/auth.py
@ -1,5 +1,6 @@
-import os
-from django.conf import settings
+__package__ = 'archivebox.core'
+
+
 from ..config import (
    LDAP
 )
--- a/archivebox/core/auth_ldap.py
+++ b/archivebox/core/auth_ldap.py
@ -1,10 +1,8 @@
-from django.conf import settings
 from ..config import (
    LDAP_CREATE_SUPERUSER
 )

 def create_user(sender, user=None, ldap_user=None, **kwargs):
-
    if not user.id and LDAP_CREATE_SUPERUSER:
        user.is_superuser = True

--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@ -18,6 +18,7 @@ from ..config import (
    CUSTOM_TEMPLATES_DIR,
    SQL_INDEX_FILENAME,
    OUTPUT_DIR,
+    ARCHIVE_DIR,
    LOGS_DIR,
    TIMEZONE,

@ -63,6 +64,9 @@ INSTALLED_APPS = [
    'core',
    'api',

+    'admin_data_views',
+
+    'signal_webhooks',
    'django_extensions',
 ]

@ -173,6 +177,17 @@ if DEBUG_TOOLBAR:
    ]
    MIDDLEWARE = [*MIDDLEWARE, 'debug_toolbar.middleware.DebugToolbarMiddleware']

+
+# https://github.com/bensi94/Django-Requests-Tracker (improved version of django-debug-toolbar)
+# Must delete archivebox/templates/admin to use because it relies on some things we override
+# visit /__requests_tracker__/ to access
+DEBUG_REQUESTS_TRACKER = False
+if DEBUG_REQUESTS_TRACKER:
+    INSTALLED_APPS += ["requests_tracker"]
+    MIDDLEWARE += ["requests_tracker.middleware.requests_tracker_middleware"]
+    INTERNAL_IPS = ["127.0.0.1", "10.0.2.2", "0.0.0.0", "*"]
+
+
 ################################################################################
 ### Staticfile and Template Settings
 ################################################################################
@ -242,6 +257,29 @@ CACHES = {
 EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'


+STORAGES = {
+    "default": {
+        "BACKEND": "django.core.files.storage.FileSystemStorage",
+    },
+    "staticfiles": {
+        "BACKEND": "django.contrib.staticfiles.storage.StaticFilesStorage",
+    },
+    "archive": {
+        "BACKEND": "django.core.files.storage.FileSystemStorage",
+        "OPTIONS": {
+            "base_url": "/archive/",
+            "location": ARCHIVE_DIR,
+        },
+    },
+    # "personas": {
+    #     "BACKEND": "django.core.files.storage.FileSystemStorage",
+    #     "OPTIONS": {
+    #         "base_url": "/personas/",
+    #         "location": PERSONAS_DIR,
+    #     },
+    # },
+}
+
 ################################################################################
 ### Security Settings
 ################################################################################
@ -368,3 +406,32 @@ LOGGING = {
        }
    },
 }
+
+
+# Add default webhook configuration to the User model
+SIGNAL_WEBHOOKS = {
+    "HOOKS": {
+        "django.contrib.auth.models.User": ...,
+        "core.models.Snapshot": ...,
+        "core.models.ArchiveResult": ...,
+        "core.models.Tag": ...,
+        "api.models.APIToken": ...,
+    },
+}
+
+
+ADMIN_DATA_VIEWS = {
+    "NAME": "configuration",
+    "URLS": [
+        {
+            "route": "live/",
+            "view": "core.views.live_config_list_view",
+            "name": "live",
+            "items": {
+                "route": "<str:key>/",
+                "view": "core.views.live_config_value_view",
+                "name": "live_config_value",
+            },
+        },
+    ],
+}
--- a/archivebox/core/urls.py
+++ b/archivebox/core/urls.py
@ -1,4 +1,4 @@
-from .admin import archivebox_admin
+__package__ = 'archivebox.core'

 from django.urls import path, include
 from django.views import static
@ -6,14 +6,9 @@ from django.contrib.staticfiles.urls import staticfiles_urlpatterns
 from django.conf import settings
 from django.views.generic.base import RedirectView

-from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
+from .admin import archivebox_admin
+from .views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView

-from ninja import NinjaAPI
-from api.auth import GlobalAuth
-
-api = NinjaAPI(auth=GlobalAuth())
-api.add_router("/auth/", "api.auth.router")
-api.add_router("/archive/", "api.archive.router")

 # GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
 # from config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
@ -43,10 +38,10 @@ urlpatterns = [
    path('accounts/', include('django.contrib.auth.urls')),
    path('admin/', archivebox_admin.urls),
    
-    path("api/", api.urls),
+    path("api/",      include('api.urls')),

    path('health/', HealthCheckView.as_view(), name='healthcheck'),
-    path('error/', lambda _: 1/0),
+    path('error/', lambda *_: 1/0),

    # path('jet_api/', include('jet_django.urls')),  Enable to use https://www.jetadmin.io/integrations/django

@ -57,10 +52,10 @@ urlpatterns = [
 urlpatterns += staticfiles_urlpatterns()

 if settings.DEBUG_TOOLBAR:
-    import debug_toolbar
-    urlpatterns += [
-        path('__debug__/', include(debug_toolbar.urls)),
-    ]
+    urlpatterns += [path('__debug__/', include("debug_toolbar.urls"))]
+
+if settings.DEBUG_REQUESTS_TRACKER:
+    urlpatterns += [path("__requests_tracker__/", include("requests_tracker.urls"))]


 # # Proposed FUTURE URLs spec
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@ -1,10 +1,12 @@
 __package__ = 'archivebox.core'

+from typing import Callable
+
 from io import StringIO
 from contextlib import redirect_stdout

 from django.shortcuts import render, redirect
-from django.http import HttpResponse, Http404
+from django.http import HttpRequest, HttpResponse, Http404
 from django.utils.html import format_html, mark_safe
 from django.views import View, static
 from django.views.generic.list import ListView
@ -14,6 +16,10 @@ from django.contrib.auth.mixins import UserPassesTestMixin
 from django.views.decorators.csrf import csrf_exempt
 from django.utils.decorators import method_decorator

+from admin_data_views.typing import TableContext, ItemContext
+from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
+
+
 from core.models import Snapshot
 from core.forms import AddLinkForm

@ -26,6 +32,10 @@ from ..config import (
    COMMIT_HASH,
    FOOTER_INFO,
    SNAPSHOTS_PER_PAGE,
+    CONFIG,
+    CONFIG_SCHEMA,
+    DYNAMIC_CONFIG_SCHEMA,
+    USER_CONFIG,
 )
 from ..main import add
 from ..util import base_url, ansi_to_html
@ -124,9 +134,9 @@ class SnapshotView(View):
                            '<center><br/><br/><br/>'
                            f'Snapshot <a href="/archive/{snapshot.timestamp}/index.html" target="_top"><b><code>[{snapshot.timestamp}]</code></b></a> exists in DB, but resource <b><code>{snapshot.timestamp}/'
                            '{}'
-                            f'</code></b> does not exist in <a href="/archive/{snapshot.timestamp}/" target="_top">snapshot dir</a> yet.<br/><br/>'
-                            'Maybe this resource type is not availabe for this Snapshot,<br/>or the archiving process has not completed yet?<br/>'
-                            f'<pre><code># run this cmd to finish archiving this Snapshot<br/>archivebox update -t timestamp {snapshot.timestamp}</code></pre><br/><br/>'
+                            f'</code></b> does not exist in the <a href="/archive/{snapshot.timestamp}/" target="_top">snapshot dir</a> yet.<br/><br/>'
+                            'It\'s possible that this resource type is not available for the Snapshot,<br/>or that the archiving process has not completed yet.<br/>'
+                            f'<pre><code># if interrupted, run this cmd to finish archiving this Snapshot<br/>archivebox update -t timestamp {snapshot.timestamp}</code></pre><br/><br/>'
                            '<div class="text-align: left; width: 100%; max-width: 400px">'
                            '<i><b>Next steps:</i></b><br/>'
                            f'- list all the <a href="/archive/{snapshot.timestamp}/" target="_top">Snapshot files <code>.*</code></a><br/>'
@ -312,3 +322,124 @@ class HealthCheckView(View):
            content_type='text/plain',
            status=200
        )
+
+
+def find_config_section(key: str) -> str:
+    matching_sections = [
+        name for name, opts in CONFIG_SCHEMA.items() if key in opts
+    ]
+    section = matching_sections[0] if matching_sections else 'DYNAMIC'
+    return section
+
+def find_config_default(key: str) -> str:
+    default_val = USER_CONFIG.get(key, {}).get('default', lambda: None)
+    if isinstance(default_val, Callable):
+        return None
+    else:
+        default_val = repr(default_val)
+    return default_val
+
+def find_config_type(key: str) -> str:
+    if key in USER_CONFIG:
+        return USER_CONFIG[key]['type'].__name__
+    elif key in DYNAMIC_CONFIG_SCHEMA:
+        return type(CONFIG[key]).__name__
+    return 'str'
+
+def key_is_safe(key: str) -> bool:
+    for term in ('key', 'password', 'secret', 'token'):
+        if term in key.lower():
+            return False
+    return True
+
+@render_with_table_view
+def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
+
+    assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
+
+    rows = {
+        "Section": [],
+        "Key": [],
+        "Type": [],
+        "Value": [],
+        "Default": [],
+        # "Documentation": [],
+        "Aliases": [],
+    }
+
+    for section in CONFIG_SCHEMA.keys():
+        for key in CONFIG_SCHEMA[section].keys():
+            rows['Section'].append(section.replace('_', ' ').title().replace(' Config', ''))
+            rows['Key'].append(ItemLink(key, key=key))
+            rows['Type'].append(mark_safe(f'<code>{find_config_type(key)}</code>'))
+            rows['Value'].append(mark_safe(f'<code>{CONFIG[key]}</code>') if key_is_safe(key) else '******** (redacted)')
+            rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+%27{key}%27&type=code"><code style="text-decoration: underline">{find_config_default(key) or 'See here...'}</code></a>'))
+            # rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
+            rows['Aliases'].append(', '.join(CONFIG_SCHEMA[section][key].get('aliases', [])))
+
+    section = 'DYNAMIC'
+    for key in DYNAMIC_CONFIG_SCHEMA.keys():
+        rows['Section'].append(section.replace('_', ' ').title().replace(' Config', ''))
+        rows['Key'].append(ItemLink(key, key=key))
+        rows['Type'].append(mark_safe(f'<code>{find_config_type(key)}</code>'))
+        rows['Value'].append(mark_safe(f'<code>{CONFIG[key]}</code>') if key_is_safe(key) else '******** (redacted)')
+        rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+%27{key}%27&type=code"><code style="text-decoration: underline">{find_config_default(key) or 'See here...'}</code></a>'))
+        # rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
+        rows['Aliases'].append(ItemLink(key, key=key) if key in USER_CONFIG else '')
+
+    return TableContext(
+        title="Computed Configuration Values",
+        table=rows,
+    )
+
+@render_with_item_view
+def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
+
+    assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
+
+    aliases = USER_CONFIG.get(key, {}).get("aliases", [])
+
+    return ItemContext(
+        slug=key,
+        title=key,
+        data=[
+            {
+                "name": mark_safe(f'data / ArchiveBox.conf &nbsp; [{find_config_section(key)}]  &nbsp; <b><code style="color: lightgray">{key}</code></b>' if key in USER_CONFIG else f'[DYNAMIC CONFIG]   &nbsp; <b><code style="color: lightgray">{key}</code></b> &nbsp; <small>(calculated at runtime)</small>'),
+                "description": None,
+                "fields": {
+                    'Key': key,
+                    'Type': find_config_type(key),
+                    'Value': CONFIG[key] if key_is_safe(key) else '********',
+                },
+                "help_texts": {
+                    'Key': mark_safe(f'''
+                        <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">Documentation</a>  &nbsp;
+                        <span style="display: {"inline" if aliases else "none"}">
+                            Aliases: {", ".join(aliases)}
+                        </span>
+                    '''),
+                    'Type': mark_safe(f'''
+                        <a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+%27{key}%27&type=code">
+                            See full definition in <code>archivebox/config.py</code>...
+                        </a>
+                    '''),
+                    'Value': mark_safe(f'''
+                        {'<b style="color: red">Value is redacted for your security. (Passwords, secrets, API tokens, etc. cannot be viewed in the Web UI)</b><br/><br/>' if not key_is_safe(key) else ''}
+                        Default: <a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+%27{key}%27&type=code">
+                            <code>{find_config_default(key) or 'See 1here...'}</code>
+                        </a>
+                        <br/><br/>
+                        <p style="display: {"block" if key in USER_CONFIG else "none"}">
+                            <i>To change this value, edit <code>data/ArchiveBox.conf</code> or run:</i>
+                            <br/><br/>
+                            <code>archivebox config --set {key}="{
+                                val.strip("'")
+                                if (val := find_config_default(key)) else
+                                (repr(CONFIG[key] if key_is_safe(key) else '********')).strip("'")
+                            }"</code>
+                        </p>
+                    '''),
+                },
+            },
+        ],
+    )
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@ -4,6 +4,7 @@ WARNING: THIS FILE IS ALL LEGACY CODE TO BE REMOVED.

 DO NOT ADD ANY NEW FEATURES TO THIS FILE, NEW CODE GOES HERE: core/models.py

+These are the old types we used to use before ArchiveBox v0.4 (before we switched to Django).
 """

 __package__ = 'archivebox.index'
--- a/archivebox/logging_util.py
+++ b/archivebox/logging_util.py
@ -494,12 +494,12 @@ def log_removal_started(links: List["Link"], yes: bool, delete: bool):
    if delete:
        file_counts = [link.num_outputs for link in links if Path(link.link_dir).exists()]
        print(
-            f'    {len(links)} Links will be de-listed from the main index, and their archived content folders will be deleted from disk.\n' +
+            f'    {len(links)} Links will be de-listed from the main index, and their archived content folders will be deleted from disk.\n'
            f'    ({len(file_counts)} data folders with {sum(file_counts)} archived files will be deleted!)'
        )
    else:
        print(
-            '    Matching links will be de-listed from the main index, but their archived content folders will remain in place on disk.\n' +
+            '    Matching links will be de-listed from the main index, but their archived content folders will remain in place on disk.\n'
            '    (Pass --delete if you also want to permanently delete the data folders)'
        )

--- a/archivebox/main.py
+++ b/archivebox/main.py
@ -104,7 +104,6 @@ from .config import (
    COMMIT_HASH,
    BUILD_TIME,
    CODE_LOCATIONS,
-    EXTERNAL_LOCATIONS,
    DATA_LOCATIONS,
    DEPENDENCIES,
    CHROME_BINARY,
@ -231,7 +230,7 @@ def version(quiet: bool=False,
        p = platform.uname()
        print(
            'ArchiveBox v{}'.format(get_version(CONFIG)),
-            *((f'COMMIT_HASH={COMMIT_HASH[:7]}',) if COMMIT_HASH else ()),
+            f'COMMIT_HASH={COMMIT_HASH[:7] if COMMIT_HASH else "unknown"}',
            f'BUILD_TIME={BUILD_TIME}',
        )
        print(
@ -272,11 +271,6 @@ def version(quiet: bool=False,
        for name, path in CODE_LOCATIONS.items():
            print(printable_folder_status(name, path))

-        print()
-        print('{white}[i] Secrets locations:{reset}'.format(**ANSI))
-        for name, path in EXTERNAL_LOCATIONS.items():
-            print(printable_folder_status(name, path))
-
        print()
        if DATA_LOCATIONS['OUTPUT_DIR']['is_valid']:
            print('{white}[i] Data locations:{reset}'.format(**ANSI))
@ -695,7 +689,7 @@ def add(urls: Union[str, List[str]],
    if CAN_UPGRADE:
        hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")

-    return all_links
+    return new_links

@enforce_types
 def remove(filter_str: Optional[str]=None,
@ -1362,7 +1356,7 @@ def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
    if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY):
        stderr('[!] Warning: you need to pass -it to use interactive commands in docker', color='lightyellow')
        stderr('    docker run -it archivebox manage {}'.format(' '.join(args or ['...'])), color='lightyellow')
-        stderr()
+        stderr('')

    execute_from_command_line([f'{ARCHIVEBOX_BINARY} manage', *(args or ['help'])])

--- a/archivebox/manage.py
+++ b/archivebox/manage.py
@ -7,7 +7,7 @@ if __name__ == '__main__':
    # versions of ./manage.py commands whenever possible. When that's not possible
    # (e.g. makemigrations), you can comment out this check temporarily

-    if not ('makemigrations' in sys.argv or 'migrate' in sys.argv):
+    if not ('makemigrations' in sys.argv or 'migrate' in sys.argv or 'startapp' in sys.argv):
        print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):")
        print()
        print('    Hint: Use these archivebox CLI commands instead of the ./manage.py equivalents:')
--- a/archivebox/parsers/init.py
+++ b/archivebox/parsers/init.py
@ -7,7 +7,6 @@ For examples of supported import formats see tests/.

 __package__ = 'archivebox.parsers'

-import re
 from io import StringIO

 from typing import IO, Tuple, List, Optional
@ -28,7 +27,6 @@ from ..util import (
    htmldecode,
    download_url,
    enforce_types,
-    URL_REGEX,
 )
 from ..index.schema import Link
 from ..logging_util import TimedProgress, log_source_saved
@ -202,54 +200,3 @@ def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{ba
    log_source_saved(source_file=source_path)

    return source_path
-
-
-# Check that plain text regex URL parsing works as expected
-#   this is last-line-of-defense to make sure the URL_REGEX isn't
-#   misbehaving due to some OS-level or environment level quirks (e.g. bad regex lib)
-#   the consequences of bad URL parsing could be disastrous and lead to many
-#   incorrect/badly parsed links being added to the archive, so this is worth the cost of checking
-_test_url_strs = {
-    'example.com': 0,
-    '/example.com': 0,
-    '//example.com': 0,
-    ':/example.com': 0,
-    '://example.com': 0,
-    'htt://example8.com': 0,
-    '/htt://example.com': 0,
-    'https://example': 1,
-    'https://localhost/2345': 1,
-    'https://localhost:1234/123': 1,
-    '://': 0,
-    'https://': 0,
-    'http://': 0,
-    'ftp://': 0,
-    'ftp://example.com': 0,
-    'https://example.com': 1,
-    'https://example.com/': 1,
-    'https://a.example.com': 1,
-    'https://a.example.com/': 1,
-    'https://a.example.com/what/is/happening.html': 1,
-    'https://a.example.com/what/ís/happening.html': 1,
-    'https://a.example.com/what/is/happening.html?what=1&2%20b#höw-about-this=1a': 1,
-    'https://a.example.com/what/is/happéning/?what=1&2%20b#how-aboüt-this=1a': 1,
-    'HTtpS://a.example.com/what/is/happening/?what=1&2%20b#how-about-this=1af&2f%20b': 1,
-    'https://example.com/?what=1#how-about-this=1&2%20baf': 1,
-    'https://example.com?what=1#how-about-this=1&2%20baf': 1,
-    '<test>http://example7.com</test>': 1,
-    'https://<test>': 0,
-    'https://[test]': 0,
-    'http://"test"': 0,
-    'http://\'test\'': 0,
-    '[https://example8.com/what/is/this.php?what=1]': 1,
-    '[and http://example9.com?what=1&other=3#and-thing=2]': 1,
-    '<what>https://example10.com#and-thing=2 "</about>': 1,
-    'abc<this["https://example11.com/what/is#and-thing=2?whoami=23&where=1"]that>def': 1,
-    'sdflkf[what](https://example12.com/who/what.php?whoami=1#whatami=2)?am=hi': 1,
-    '<or>http://examplehttp://15.badc</that>': 2,
-    'https://a.example.com/one.html?url=http://example.com/inside/of/another?=http://': 2,
-    '[https://a.example.com/one.html?url=http://example.com/inside/of/another?=](http://a.example.com)': 3,
-}
-for url_str, num_urls in _test_url_strs.items():
-    assert len(re.findall(URL_REGEX, url_str)) == num_urls, (
-        f'{url_str} does not contain {num_urls} urls')
--- a/archivebox/parsers/generic_html.py
+++ b/archivebox/parsers/generic_html.py
@ -10,7 +10,7 @@ from ..index.schema import Link
 from ..util import (
    htmldecode,
    enforce_types,
-    URL_REGEX,
+    find_all_urls,
 )
 from html.parser import HTMLParser
 from urllib.parse import urljoin
@ -40,10 +40,22 @@ def parse_generic_html_export(html_file: IO[str], root_url: Optional[str]=None,
        parser.feed(line)
        for url in parser.urls:
            if root_url:
-                # resolve relative urls /home.html -> https://example.com/home.html
-                url = urljoin(root_url, url)
-            
-            for archivable_url in re.findall(URL_REGEX, url):
+                url_is_absolute = (url.lower().startswith('http://') or url.lower().startswith('https://'))
+                # url = https://abc.com                       => True
+                # url = /page.php?next=https://example.com    => False
+
+                if not url_is_absolute:                       # resolve it by joining it with root_url
+                    relative_path = url
+
+                    url = urljoin(root_url, relative_path)    # https://example.com/somepage.html + /home.html
+                                                              # => https://example.com/home.html
+
+                    # special case to handle bug around // handling, crucial for urls that contain sub-urls
+                    # e.g. https://web.archive.org/web/https://example.com
+                    if did_urljoin_misbehave(root_url, relative_path, url):
+                        url = fix_urljoin_bug(url)
+
+            for archivable_url in find_all_urls(url):
                yield Link(
                    url=htmldecode(archivable_url),
                    timestamp=str(datetime.now(timezone.utc).timestamp()),
@ -56,3 +68,74 @@ def parse_generic_html_export(html_file: IO[str], root_url: Optional[str]=None,
 KEY = 'html'
 NAME = 'Generic HTML'
 PARSER = parse_generic_html_export
+
+
+#### WORKAROUND CODE FOR https://github.com/python/cpython/issues/96015 ####
+
+def did_urljoin_misbehave(root_url: str, relative_path: str, final_url: str) -> bool:
+    """
+    Handle urljoin edge case bug where multiple slashes get turned into a single slash:
+    - https://github.com/python/cpython/issues/96015
+    - https://github.com/ArchiveBox/ArchiveBox/issues/1411
+
+    This workaround only fixes the most common case of a sub-URL inside an outer URL, e.g.:
+       https://web.archive.org/web/https://example.com/some/inner/url
+
+    But there are other valid URLs containing // that are not fixed by this workaround, e.g.:
+       https://example.com/drives/C//some/file
+    """
+
+    # if relative path is actually an absolute url, cut off its own scheme so we check the path component only
+    relative_path = relative_path.lower()
+    if relative_path.startswith('http://') or relative_path.startswith('https://'):
+        relative_path = relative_path.split('://', 1)[-1]
+
+    # TODO: properly fix all double // getting stripped by urljoin, not just ://
+    original_path_had_suburl = '://' in relative_path
+    original_root_had_suburl = '://' in root_url[8:]     # ignore first 8 chars because root always starts with https://
+    final_joined_has_suburl = '://' in final_url[8:]     # ignore first 8 chars because final always starts with https://
+
+    urljoin_broke_suburls = (
+        (original_root_had_suburl or original_path_had_suburl)
+        and not final_joined_has_suburl
+    )
+    return urljoin_broke_suburls
+
+
+def fix_urljoin_bug(url: str, nesting_limit=5):
+    """
+    recursively replace broken suburls .../http:/... with .../http://...
+
+    basically equivalent to this for 99.9% of cases:
+      url = url.replace('/http:/',  '/http://')
+      url = url.replace('/https:/', '/https://')
+    except this handles:
+        other schemes besides http/https     (e.g. https://example.com/link/git+ssh://github.com/example)
+        other preceding separators besides / (e.g. https://example.com/login/?next=https://example.com/home)
+        fixing multiple suburls recursively
+    """
+    input_url = url
+    for _ in range(nesting_limit):
+        url = re.sub(
+            r'(?P<root>.+?)'                             # https://web.archive.org/web
+            + r'(?P<separator>[-=/_&+%$#@!*\(\\])'       # /
+            + r'(?P<subscheme>[a-zA-Z0-9+_-]{1,32}?):/'  # http:/
+            + r'(?P<suburl>[^/\\]+)',                    # example.com
+            r"\1\2\3://\4",
+            input_url,
+            re.IGNORECASE | re.UNICODE,
+        )
+        if url == input_url:
+            break                                        # nothing left to replace, all suburls are fixed
+        input_url = url
+
+    return url
+
+
+# sanity check to make sure workaround code works as expected and doesnt introduce *more* bugs
+assert did_urljoin_misbehave('https://web.archive.org/web/https://example.com', 'abc.html', 'https://web.archive.org/web/https:/example.com/abc.html') == True
+assert did_urljoin_misbehave('http://example.com', 'https://web.archive.org/web/http://example.com/abc.html', 'https://web.archive.org/web/http:/example.com/abc.html') == True
+assert fix_urljoin_bug('https:/example.com') == 'https:/example.com'   # should not modify original url's scheme, only sub-urls
+assert fix_urljoin_bug('https://web.archive.org/web/https:/example.com/abc.html') == 'https://web.archive.org/web/https://example.com/abc.html'
+assert fix_urljoin_bug('http://example.com/link/git+ssh:/github.com/example?next=ftp:/example.com') == 'http://example.com/link/git+ssh://github.com/example?next=ftp://example.com'
+
--- a/archivebox/parsers/generic_json.py
+++ b/archivebox/parsers/generic_json.py
@ -72,21 +72,13 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]:

    json_file.seek(0)

-    try:
-        links = json.load(json_file)
-        if type(links) != list:
-            raise Exception('JSON parser expects list of objects, maybe this is JSONL?')
-    except json.decoder.JSONDecodeError:
-        # sometimes the first line is a comment or other junk, so try without
-        json_file.seek(0)
-        first_line = json_file.readline()
-        #print('      > Trying JSON parser without first line: "', first_line.strip(), '"', sep= '')
-        links = json.load(json_file)
-        # we may fail again, which means we really don't know what to do
-
+    links = json.load(json_file)
+    if type(links) != list:
+        raise Exception('JSON parser expects list of objects, maybe this is JSONL?')
+    
    for link in links:
        if link:
-            yield jsonObjectToLink(link,json_file.name)
+            yield jsonObjectToLink(link, json_file.name)

 KEY = 'json'
 NAME = 'Generic JSON'
--- a/archivebox/parsers/generic_jsonl.py
+++ b/archivebox/parsers/generic_jsonl.py
@ -3,11 +3,9 @@ __package__ = 'archivebox.parsers'
 import json

 from typing import IO, Iterable
-from datetime import datetime, timezone

 from ..index.schema import Link
 from ..util import (
-    htmldecode,
    enforce_types,
 )

--- a/archivebox/parsers/generic_txt.py
+++ b/archivebox/parsers/generic_txt.py
@ -1,8 +1,6 @@
 __package__ = 'archivebox.parsers'
 __description__ = 'Plain Text'

-import re
-
 from typing import IO, Iterable
 from datetime import datetime, timezone
 from pathlib import Path
@ -11,7 +9,7 @@ from ..index.schema import Link
 from ..util import (
    htmldecode,
    enforce_types,
-    URL_REGEX
+    find_all_urls,
 )


@ -39,7 +37,7 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]:
            pass

        # otherwise look for anything that looks like a URL in the line
-        for url in re.findall(URL_REGEX, line):
+        for url in find_all_urls(line):
            yield Link(
                url=htmldecode(url),
                timestamp=str(datetime.now(timezone.utc).timestamp()),
@ -48,17 +46,6 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]:
                sources=[text_file.name],
            )

-            # look inside the URL for any sub-urls, e.g. for archive.org links
-            # https://web.archive.org/web/20200531203453/https://www.reddit.com/r/socialism/comments/gu24ke/nypd_officers_claim_they_are_protecting_the_rule/fsfq0sw/
-            # -> https://www.reddit.com/r/socialism/comments/gu24ke/nypd_officers_claim_they_are_protecting_the_rule/fsfq0sw/
-            for sub_url in re.findall(URL_REGEX, line[1:]):
-                yield Link(
-                    url=htmldecode(sub_url),
-                    timestamp=str(datetime.now(timezone.utc).timestamp()),
-                    title=None,
-                    tags=None,
-                    sources=[text_file.name],
-                )

 KEY = 'txt'
 NAME = 'Generic TXT'
--- a/archivebox/templates/core/navigation.html
+++ b/archivebox/templates/core/navigation.html
@ -6,6 +6,7 @@
    <a href="/admin/core/tag/">Tags</a> |
    <a href="/admin/core/archiveresult/?o=-1">Log</a> &nbsp; &nbsp;
    <a href="{% url 'Docs' %}" target="_blank" rel="noopener noreferrer">Docs</a> | 
+    <a href="/api">API</a> | 
    <a href="{% url 'public-index' %}">Public</a> | 
    <a href="/admin/">Admin</a>
     &nbsp; &nbsp;
@ -16,7 +17,7 @@
        {% endblock %}
        {% block userlinks %}
            {% if user.has_usable_password %}
-                <a href="{% url 'admin:password_change' %}">Account</a> /
+                <a href="{% url 'admin:password_change' %}" title="Change your account password">Account</a> /
            {% endif %}
            <a href="{% url 'admin:logout' %}">{% trans 'Log out' %}</a>
        {% endblock %}
--- a/archivebox/util.py
+++ b/archivebox/util.py
@ -62,12 +62,12 @@ COLOR_REGEX = re.compile(r'\[(?P<arg_1>\d+)(;(?P<arg_2>\d+)(;(?P<arg_3>\d+))?)?m

 # https://mathiasbynens.be/demo/url-regex
 URL_REGEX = re.compile(
-    r'(?=('                           +
-    r'http[s]?://'                    +  # start matching from allowed schemes
-    r'(?:[a-zA-Z]|[0-9]'              +  # followed by allowed alphanum characters
-    r'|[-_$@.&+!*\(\),]'              +  #   or allowed symbols (keep hyphen first to match literal hyphen)
-    r'|[^\u0000-\u007F])+'            +  #   or allowed unicode bytes
-    r'[^\]\[<>"\'\s]+'                +  # stop parsing at these symbols
+    r'(?=('                          
+    r'http[s]?://'                     # start matching from allowed schemes
+    r'(?:[a-zA-Z]|[0-9]'               # followed by allowed alphanum characters
+    r'|[-_$@.&+!*\(\),]'               #   or allowed symbols (keep hyphen first to match literal hyphen)
+    r'|[^\u0000-\u007F])+'             #   or allowed unicode bytes
+    r'[^\]\[<>"\'\s]+'                 # stop parsing at these symbols
    r'))',
    re.IGNORECASE | re.UNICODE,
 )
@ -90,6 +90,11 @@ def fix_url_from_markdown(url_str: str) -> str:
    helpful to fix URLs parsed from markdown e.g.
      input:  https://wikipedia.org/en/some_article_(Disambiguation).html?abc=def).somemoretext
      result: https://wikipedia.org/en/some_article_(Disambiguation).html?abc=def
+
+    IMPORTANT ASSUMPTION: valid urls wont have unbalanced or incorrectly nested parentheses
+    e.g. this will fail the user actually wants to ingest a url like 'https://example.com/some_wei)(rd_url'
+         in that case it will return https://example.com/some_wei (truncated up to the first unbalanced paren)
+    This assumption is true 99.9999% of the time, and for the rare edge case the user can use url_list parser.
    """
    trimmed_url = url_str

@ -353,7 +358,8 @@ def chrome_cleanup():
    if IN_DOCKER and lexists("/home/archivebox/.config/chromium/SingletonLock"):
        remove_file("/home/archivebox/.config/chromium/SingletonLock")

-def ansi_to_html(text):
+@enforce_types
+def ansi_to_html(text: str) -> str:
    """
    Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
    """
@ -439,11 +445,14 @@ class ExtendedEncoder(pyjson.JSONEncoder):


 ### URL PARSING TESTS / ASSERTIONS
-# they run at runtime because I like having them inline in this file,
-# I like the peace of mind knowing it's enforced at runtime across all OS's (in case the regex engine ever has any weird locale-specific quirks),
-# and these assertions are basically instant, so not a big performance cost to do it on startup

-assert fix_url_from_markdown('/a(b)c).x(y)z') == '/a(b)c'
+# Check that plain text regex URL parsing works as expected
+#   this is last-line-of-defense to make sure the URL_REGEX isn't
+#   misbehaving due to some OS-level or environment level quirks (e.g. regex engine / cpython / locale differences)
+#   the consequences of bad URL parsing could be disastrous and lead to many
+#   incorrect/badly parsed links being added to the archive, so this is worth the cost of checking
+
+assert fix_url_from_markdown('http://example.com/a(b)c).x(y)z') == 'http://example.com/a(b)c'
 assert fix_url_from_markdown('https://wikipedia.org/en/some_article_(Disambiguation).html?abc=def).link(with)_trailingtext') == 'https://wikipedia.org/en/some_article_(Disambiguation).html?abc=def'

 URL_REGEX_TESTS = [
@ -482,3 +491,50 @@ URL_REGEX_TESTS = [
 for urls_str, expected_url_matches in URL_REGEX_TESTS:
    url_matches = list(find_all_urls(urls_str))
    assert url_matches == expected_url_matches, 'FAILED URL_REGEX CHECK!'
+
+
+# More test cases
+_test_url_strs = {
+    'example.com': 0,
+    '/example.com': 0,
+    '//example.com': 0,
+    ':/example.com': 0,
+    '://example.com': 0,
+    'htt://example8.com': 0,
+    '/htt://example.com': 0,
+    'https://example': 1,
+    'https://localhost/2345': 1,
+    'https://localhost:1234/123': 1,
+    '://': 0,
+    'https://': 0,
+    'http://': 0,
+    'ftp://': 0,
+    'ftp://example.com': 0,
+    'https://example.com': 1,
+    'https://example.com/': 1,
+    'https://a.example.com': 1,
+    'https://a.example.com/': 1,
+    'https://a.example.com/what/is/happening.html': 1,
+    'https://a.example.com/what/ís/happening.html': 1,
+    'https://a.example.com/what/is/happening.html?what=1&2%20b#höw-about-this=1a': 1,
+    'https://a.example.com/what/is/happéning/?what=1&2%20b#how-aboüt-this=1a': 1,
+    'HTtpS://a.example.com/what/is/happening/?what=1&2%20b#how-about-this=1af&2f%20b': 1,
+    'https://example.com/?what=1#how-about-this=1&2%20baf': 1,
+    'https://example.com?what=1#how-about-this=1&2%20baf': 1,
+    '<test>http://example7.com</test>': 1,
+    'https://<test>': 0,
+    'https://[test]': 0,
+    'http://"test"': 0,
+    'http://\'test\'': 0,
+    '[https://example8.com/what/is/this.php?what=1]': 1,
+    '[and http://example9.com?what=1&other=3#and-thing=2]': 1,
+    '<what>https://example10.com#and-thing=2 "</about>': 1,
+    'abc<this["https://example11.com/what/is#and-thing=2?whoami=23&where=1"]that>def': 1,
+    'sdflkf[what](https://example12.com/who/what.php?whoami=1#whatami=2)?am=hi': 1,
+    '<or>http://examplehttp://15.badc</that>': 2,
+    'https://a.example.com/one.html?url=http://example.com/inside/of/another?=http://': 2,
+    '[https://a.example.com/one.html?url=http://example.com/inside/of/another?=](http://a.example.com)': 3,
+}
+for url_str, num_urls in _test_url_strs.items():
+    assert len(list(find_all_urls(url_str))) == num_urls, (
+        f'{url_str} does not contain {num_urls} urls')
--- a/bin/build_docker.sh
+++ b/bin/build_docker.sh
@ -18,7 +18,7 @@ which docker > /dev/null || exit 1
 which jq > /dev/null || exit 1
 # which pdm > /dev/null || exit 1

-SUPPORTED_PLATFORMS="linux/amd64,linux/arm64,linux/arm/v7"
+SUPPORTED_PLATFORMS="linux/amd64,linux/arm64"

 TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
 VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
@ -80,20 +80,20 @@ echo "[+] Building archivebox:$VERSION docker image..."
 # docker build . --no-cache -t archivebox-dev \
 # replace --load with --push to deploy
 docker buildx build --platform "$SELECTED_PLATFORMS" --load . \
-               -t archivebox/archivebox \
+               # -t archivebox/archivebox \
               -t archivebox/archivebox:$TAG_NAME \
-               -t archivebox/archivebox:$VERSION \
-               -t archivebox/archivebox:$SHORT_VERSION \
+               # -t archivebox/archivebox:$VERSION \
+               # -t archivebox/archivebox:$SHORT_VERSION \
               -t archivebox/archivebox:$GIT_SHA \
-               -t archivebox/archivebox:latest \
-               -t nikisweeting/archivebox \
+               # -t archivebox/archivebox:latest \
+               # -t nikisweeting/archivebox \
               -t nikisweeting/archivebox:$TAG_NAME \
-               -t nikisweeting/archivebox:$VERSION \
-               -t nikisweeting/archivebox:$SHORT_VERSION \
+               # -t nikisweeting/archivebox:$VERSION \
+               # -t nikisweeting/archivebox:$SHORT_VERSION \
               -t nikisweeting/archivebox:$GIT_SHA \
-               -t nikisweeting/archivebox:latest \
+               # -t nikisweeting/archivebox:latest \
               -t ghcr.io/archivebox/archivebox/archivebox:$TAG_NAME \
-               -t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
-               -t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION \
+               # -t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
+               # -t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION \
               -t ghcr.io/archivebox/archivebox/archivebox:$GIT_SHA \
-               -t ghcr.io/archivebox/archivebox/archivebox:latest
+               # -t ghcr.io/archivebox/archivebox/archivebox:latest
--- a/bin/docker_entrypoint.sh
+++ b/bin/docker_entrypoint.sh
@ -18,6 +18,7 @@
 # https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
 # set -o xtrace
 # set -o nounset
+shopt -s nullglob
 set -o errexit
 set -o errtrace
 set -o pipefail
--- a/bin/lint.sh
+++ b/bin/lint.sh
@ -15,7 +15,7 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
 source "$DIR/.venv/bin/activate"

 echo "[*] Running flake8..."
-cd archivebox
+cd "$DIR/archivebox"
 flake8 . && echo "√ No errors found."

 echo
--- a/bin/lock_pkgs.sh
+++ b/bin/lock_pkgs.sh
@ -48,7 +48,7 @@ echo

 echo "[+] Generating dev & prod requirements.txt & pdm.lock from pyproject.toml..."
 pip install --upgrade pip setuptools
-pdm self update
+pdm self update >/dev/null 2>&1 || true
 pdm venv create 3.12
 echo
 echo "pyproject.toml:    archivebox $(grep 'version = ' pyproject.toml | awk '{print $3}' | jq -r)"
@ -73,7 +73,7 @@ cp ./pdm.dev.lock ./pip_dist/
 cp ./requirements-dev.txt ./pip_dist/

 echo
-echo "[+]] Generating package-lock.json from package.json..."
+echo "[+] Generating package-lock.json from package.json..."
 npm install -g npm
 echo
 echo "package.json:    archivebox $(jq -r '.version' package.json)"
--- a/bin/setup.sh
+++ b/bin/setup.sh
@ -27,9 +27,9 @@ if (which docker-compose > /dev/null && docker pull archivebox/archivebox:latest
    if [ -f "./index.sqlite3" ]; then
        mv -i ~/archivebox/* ~/archivebox/data/
    fi
-    curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/main/docker-compose.yml' > docker-compose.yml
+    curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/stable/docker-compose.yml' > docker-compose.yml
    mkdir -p ./etc
-    curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/main/etc/sonic.cfg' > ./etc/sonic.cfg
+    curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/stable/etc/sonic.cfg' > ./etc/sonic.cfg
    docker compose run --rm archivebox init --setup
    echo
    echo "[+] Starting ArchiveBox server using: docker compose up -d..."
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -48,17 +48,17 @@ services:
    #   $ docker compose restart archivebox_scheduler

    archivebox_scheduler:
-       image: archivebox/archivebox:latest
-       command: schedule --foreground --update --every=day
-       environment:
-           - TIMEOUT=120                       # use a higher timeout than the main container to give slow tasks more time when retrying
-           # - PUID=502                        # set to your host user's UID & GID if you encounter permissions issues
-           # - PGID=20
-       volumes:
-           - ./data:/data
-       # cpus: 2                               # uncomment / edit these values to limit scheduler container resource consumption
-       # mem_limit: 2048m
-       # restart: always
+        image: archivebox/archivebox:latest
+        command: schedule --foreground --update --every=day
+        environment:
+            - TIMEOUT=120                       # use a higher timeout than the main container to give slow tasks more time when retrying
+            # - PUID=502                        # set to your host user's UID & GID if you encounter permissions issues
+            # - PGID=20
+        volumes:
+            - ./data:/data
+        # cpus: 2                               # uncomment / edit these values to limit scheduler container resource consumption
+        # mem_limit: 2048m
+        # restart: always


    ### This runs the optional Sonic full-text search backend (much faster than default rg backend).
@ -72,7 +72,7 @@ services:
            # not needed after first run / if you have already have ./etc/sonic.cfg present
            dockerfile_inline: |
                FROM quay.io/curl/curl:latest AS config_downloader
-                RUN curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/main/etc/sonic.cfg' > /tmp/sonic.cfg
+                RUN curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/stable/etc/sonic.cfg' > /tmp/sonic.cfg
                FROM valeriansaliou/sonic:latest
                COPY --from=config_downloader /tmp/sonic.cfg /etc/sonic.cfg
        expose:
@ -99,7 +99,7 @@ services:
            # restricted to access from localhost by default because it has no authentication
            - 127.0.0.1:8080:8080

-    
+
    ### Example: Put Nginx in front of the ArchiveBox server for SSL termination and static file serving.
    # You can also any other ingress provider for SSL like Apache, Caddy, Traefik, Cloudflare Tunnels, etc.

@ -173,7 +173,7 @@ services:

    ### Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel to avoid IP blocks.
    # You can also use any other VPN that works at the docker IP level, e.g. Tailscale, OpenVPN, etc.
-    
+
    # wireguard:
    #   image: linuxserver/wireguard:latest
    #   network_mode: 'service:archivebox'
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit a1b69c51ba9b249c0b2a6efd141dbb792fc36ad2
+Subproject commit f23abba9773b67ad9f2fd04d6f2e8e056dfa6521
--- a/package-lock.json
+++ b/package-lock.json
@ -25,9 +25,9 @@
      }
    },
    "node_modules/@babel/runtime-corejs2": {
-      "version": "7.24.4",
-      "resolved": "https://registry.npmjs.org/@babel/runtime-corejs2/-/runtime-corejs2-7.24.4.tgz",
-      "integrity": "sha512-ZCKqyUKt/Coimg+3Kafu43yNetgYnTXzNbEGAgxc81J5sI0qFNbQ613w7PNny+SmijAmGVroL0GDvx5rG/JI5Q==",
+      "version": "7.24.5",
+      "resolved": "https://registry.npmjs.org/@babel/runtime-corejs2/-/runtime-corejs2-7.24.5.tgz",
+      "integrity": "sha512-cC9jiO6s/IN+xwCHYy1AGrcFJ4bwgIwb8HX1KaoEpRsznLlO4x9eBP6AX7RIeMSWlQqEj2WHox637OS8cDq6Ew==",
      "dependencies": {
        "core-js": "^2.6.12",
        "regenerator-runtime": "^0.14.0"
@ -203,9 +203,9 @@
      "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA=="
    },
    "node_modules/@types/node": {
-      "version": "20.12.7",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz",
-      "integrity": "sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==",
+      "version": "20.12.8",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.8.tgz",
+      "integrity": "sha512-NU0rJLJnshZWdE/097cdCBbyW1h4hEg0xpovcoAQYHl8dnEyp/NAOiE45pvc+Bd1Dt+2r94v2eGFpQJ4R7g+2w==",
      "optional": true,
      "dependencies": {
        "undici-types": "~5.26.4"
@ -713,9 +713,9 @@
      "integrity": "sha512-3VdM/SXBZX2omc9JF9nOPCtDaYQ67BGp5CoLpIQlO2KCAPETs8TcDHacF26jXadGbvUteZzRTeos2fhID5+ucQ=="
    },
    "node_modules/dompurify": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.1.0.tgz",
-      "integrity": "sha512-yoU4rhgPKCo+p5UrWWWNKiIq+ToGqmVVhk0PmMYBK4kRsR3/qhemNFL8f6CFmBd4gMwm3F4T7HBoydP5uY07fA=="
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.1.2.tgz",
+      "integrity": "sha512-hLGGBI1tw5N8qTELr3blKjAML/LY4ANxksbS612UiJyDfyf/2D092Pvm+S7pmeTGJRqvlJkFzBoHBQKgQlOQVg=="
    },
    "node_modules/domutils": {
      "version": "1.5.1",
@ -1655,6 +1655,26 @@
        "node": ">=18"
      }
    },
+    "node_modules/puppeteer-core/node_modules/ws": {
+      "version": "8.16.0",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.16.0.tgz",
+      "integrity": "sha512-HS0c//TP7Ina87TfiPUz1rQzMhHrl/SG2guqRcTOIUYD2q8uhUdNHZYJUaQ8aTGPzCh+c6oawMKW35nFl1dxyQ==",
+      "engines": {
+        "node": ">=10.0.0"
+      },
+      "peerDependencies": {
+        "bufferutil": "^4.0.1",
+        "utf-8-validate": ">=5.0.2"
+      },
+      "peerDependenciesMeta": {
+        "bufferutil": {
+          "optional": true
+        },
+        "utf-8-validate": {
+          "optional": true
+        }
+      }
+    },
    "node_modules/qs": {
      "version": "6.5.3",
      "resolved": "https://registry.npmjs.org/qs/-/qs-6.5.3.tgz",
@ -2071,9 +2091,9 @@
      }
    },
    "node_modules/tough-cookie": {
-      "version": "4.1.3",
-      "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-4.1.3.tgz",
-      "integrity": "sha512-aX/y5pVRkfRnfmuX+OdbSdXvPe6ieKX/G2s7e98f4poJHnqH3281gDPm/metm6E/WRamfx7WC4HUqkWHfQHprw==",
+      "version": "4.1.4",
+      "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-4.1.4.tgz",
+      "integrity": "sha512-Loo5UUvLD9ScZ6jh8beX1T6sO1w2/MpCRpEP7V280GKMVUQ0Jzar2U3UJPsrdbziLEMMhu3Ujnq//rhiFuIeag==",
      "dependencies": {
        "psl": "^1.1.33",
        "punycode": "^2.1.1",
@ -2276,9 +2296,9 @@
      "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="
    },
    "node_modules/ws": {
-      "version": "8.16.0",
-      "resolved": "https://registry.npmjs.org/ws/-/ws-8.16.0.tgz",
-      "integrity": "sha512-HS0c//TP7Ina87TfiPUz1rQzMhHrl/SG2guqRcTOIUYD2q8uhUdNHZYJUaQ8aTGPzCh+c6oawMKW35nFl1dxyQ==",
+      "version": "8.17.0",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.17.0.tgz",
+      "integrity": "sha512-uJq6108EgZMAl20KagGkzCKfMEjxmKvZHG7Tlq0Z6nOky7YF7aq4mOx6xK8TJ/i1LeK4Qus7INktacctDgY8Ow==",
      "engines": {
        "node": ">=10.0.0"
      },
--- a/pdm.lock
+++ b/pdm.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -12,32 +12,31 @@ readme = "README.md"
 # pdm install
 # pdm update --unconstrained
 dependencies = [
+    # Last Bumped: 2024-04-25
    # Base Framework and Language Dependencies
    "setuptools>=69.5.1",
-    "django>=4.2.0,<5.0",
+    "django>=5.0.4,<6.0",
    "django-ninja>=1.1.0",
    "django-extensions>=3.2.3",
    "mypy-extensions>=1.0.0",
-
    # Python Helper Libraries
    "requests>=2.31.0",
    "dateparser>=1.0.0",
    "feedparser>=6.0.11",
-    "w3lib>=1.22.0",
-
+    "w3lib>=2.1.2",
    # Feature-Specific Dependencies
-    "python-crontab>=2.5.1",          # for: archivebox schedule
-    "croniter>=0.3.34",               # for: archivebox schedule
-    "ipython>5.0.0",                  # for: archivebox shell
-
+    "python-crontab>=3.0.0",          # for: archivebox schedule
+    "croniter>=2.0.5",                # for: archivebox schedule
+    "ipython>=8.23.0",                # for: archivebox shell
    # Extractor Dependencies
    "yt-dlp>=2024.4.9",               # for: media
-    "playwright>=1.43.0; platform_machine != 'armv7l'",  # WARNING: playwright doesn't have any sdist, causes trouble on build systems that refuse to install wheel-only packages
-    
+    # "playwright>=1.43.0; platform_machine != 'armv7l'",  # WARNING: playwright doesn't have any sdist, causes trouble on build systems that refuse to install wheel-only packages
    # TODO: add more extractors
    #  - gallery-dl
    #  - scihubdl
    #  - See Github issues for more...
+    "django-signal-webhooks>=0.3.0",
+    "django-admin-data-views>=0.3.1",
 ]

 homepage = "https://github.com/ArchiveBox/ArchiveBox"
@ -59,9 +58,6 @@ classifiers = [
    "Natural Language :: English",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.7",
-    "Programming Language :: Python :: 3.8",
-    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
@ -100,10 +96,10 @@ ldap = [
 # pdm update --dev --unconstrained
 [tool.pdm.dev-dependencies]
 build = [
+    # "pdm",                           # usually installed by apt/brew, dont double-install with pip
    "setuptools>=69.5.1",
    "pip",
    "wheel",
-    "pdm",
    "homebrew-pypi-poet>=0.10.0",      # for: generating archivebox.rb brewfile list of python packages
 ]
 docs = [
@ -115,10 +111,11 @@ debug = [
    "django-debug-toolbar",
    "djdt_flamegraph",
    "ipdb",
+    "requests-tracker>=0.3.3",
 ]
 test = [
-    "pdm[pytest]",
    "pytest",
+    "bottle",
 ]
 lint = [
    "flake8",
@ -126,6 +123,12 @@ lint = [
    "django-stubs",
 ]

+[tool.pdm.scripts]
+lint = "./bin/lint.sh"
+test = "./bin/test.sh"
+# all = {composite = ["lint mypackage/", "test -v tests/"]}
+
+
 [build-system]
 requires = ["pdm-backend"]
 build-backend = "pdm.backend"
@ -134,11 +137,6 @@ build-backend = "pdm.backend"
 archivebox = "archivebox.cli:main"


-[tool.pdm.scripts]
-lint = "./bin/lint.sh"
-test = "./bin/test.sh"
-# all = {composite = ["lint mypackage/", "test -v tests/"]}
-
 [tool.pytest.ini_options]
 testpaths = [ "tests" ]

@ -154,6 +152,8 @@ explicit_package_bases = true
 # exclude = "pdm/(pep582/|models/in_process/.+\\.py)"
 plugins = ["mypy_django_plugin.main"]

+[tool.django-stubs]
+django_settings_module = "core.settings"


 [project.urls]
--- a/requirements.txt
+++ b/requirements.txt
@ -2,54 +2,59 @@
 # Please do not edit it manually.

 annotated-types==0.6.0
+anyio==4.3.0
 asgiref==3.8.1
 asttokens==2.4.1
 brotli==1.1.0; implementation_name == "cpython"
 brotlicffi==1.1.0.0; implementation_name != "cpython"
 certifi==2024.2.2
-cffi==1.16.0; implementation_name != "cpython"
+cffi==1.16.0; platform_python_implementation != "PyPy" or implementation_name != "cpython"
 charset-normalizer==3.3.2
 colorama==0.4.6; sys_platform == "win32"
 croniter==2.0.5
+cryptography==42.0.7
 dateparser==1.2.0
 decorator==5.1.1
-django==4.2.11
+django==5.0.4
 django-auth-ldap==4.8.0
 django-extensions==3.2.3
 django-ninja==1.1.0
+django-settings-holder==0.1.2
+django-signal-webhooks==0.3.0
 exceptiongroup==1.2.1; python_version < "3.11"
 executing==2.0.1
 feedparser==6.0.11
-greenlet==3.0.3; platform_machine != "armv7l"
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.0
 idna==3.7
-ipython==8.23.0
+ipython==8.24.0
 jedi==0.19.1
 matplotlib-inline==0.1.7
 mutagen==1.47.0
 mypy-extensions==1.0.0
 parso==0.8.4
 pexpect==4.9.0; sys_platform != "win32" and sys_platform != "emscripten"
-playwright==1.43.0; platform_machine != "armv7l"
 prompt-toolkit==3.0.43
 ptyprocess==0.7.0; sys_platform != "win32" and sys_platform != "emscripten"
 pure-eval==0.2.2
 pyasn1==0.6.0
 pyasn1-modules==0.4.0
-pycparser==2.22; implementation_name != "cpython"
+pycparser==2.22; platform_python_implementation != "PyPy" or implementation_name != "cpython"
 pycryptodomex==3.20.0
 pydantic==2.7.1
 pydantic-core==2.18.2
-pyee==11.1.0; platform_machine != "armv7l"
-pygments==2.17.2
+pygments==2.18.0
 python-crontab==3.0.0
 python-dateutil==2.9.0.post0
 python-ldap==3.4.4
 pytz==2024.1
-regex==2024.4.16
+regex==2024.4.28
 requests==2.31.0
 setuptools==69.5.1
 sgmllib3k==1.0.0
 six==1.16.0
+sniffio==1.3.1
 sonic-client==1.0.0
 sqlparse==0.5.0
 stack-data==0.6.3