diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 25aa5316..9ff04c51 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -75,7 +75,7 @@ jobs: tags: ${{ steps.docker_meta.outputs.tags }} cache-from: type=local,src=/tmp/.buildx-cache cache-to: type=local,dest=/tmp/.buildx-cache - platforms: linux/amd64,linux/386,linux/arm64,linux/arm/v7 + platforms: linux/amd64,linux/arm64,linux/arm/v7 - name: Image digest run: echo ${{ steps.docker_build.outputs.digest }} diff --git a/Dockerfile b/Dockerfile index 507ee6ac..8cf2da30 100644 --- a/Dockerfile +++ b/Dockerfile @@ -63,6 +63,7 @@ RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - && apt-get update -qq \ && apt-get install -qq -y --no-install-recommends \ nodejs \ + # && npm install -g npm \ && rm -rf /var/lib/apt/lists/* # Install Node dependencies @@ -82,6 +83,7 @@ ADD ./pip_dist/archivebox.egg-info/requires.txt "$CODE_DIR/pip_dist/archivebox.e RUN apt-get update -qq \ && apt-get install -qq -y --no-install-recommends \ build-essential python-dev python3-dev \ + # && pip install --upgrade pip \ && grep -B 1000 -E '^$' "$CODE_DIR/pip_dist/archivebox.egg-info/requires.txt" | pip install --quiet -r /dev/stdin \ && pip install --quiet "sonic-client==0.0.5" \ && apt-get purge -y build-essential python-dev python3-dev \ diff --git a/MANIFEST.in b/MANIFEST.in index c9ae1535..f33f160f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,3 +2,5 @@ graft archivebox global-exclude .DS_Store global-exclude __pycache__ global-exclude *.pyc + +prune tests/ diff --git a/README.md b/README.md index a83922a3..76b51be3 100644 --- a/README.md +++ b/README.md @@ -82,8 +82,9 @@ archivebox help . . . . . . . . . . . . . . . . . . . . . . . . . . . .

cli init screenshot +cli init screenshot server snapshot admin screenshot -server snapshot details page screenshot +server snapshot details page screenshot

grassgrass @@ -266,10 +267,7 @@ No matter which install method you choose, they all roughly follow this 3-step p
- - - -

+
. . . . . . . . . . . . . . . . . . . . . . . . . . . .

DEMO: https://archivebox.zervice.io
@@ -327,7 +325,15 @@ All of ArchiveBox's state (including the index, snapshot data, and config file) The on-disk layout is optimized to be easy to browse by hand and durable long-term. The main index is a standard sqlite3 database (it can also be exported as static JSON/HTML), and the archive snapshots are organized by date-added timestamp in the `archive/` subfolder. Each snapshot subfolder includes a static JSON and HTML index describing its contents, and the snapshot extrator outputs are plain files within the folder (e.g. `media/example.mp4`, `git/somerepo.git`, `static/someimage.png`, etc.) ```bash - ls ./archive// +# to browse your index statically without running the archivebox server, run: +archivebox list --html --with-headers > index.html +archivebox list --json --with-headers > index.json + +# then open the static index in a browser +open index.html + +# or browse the snapshots via filesystem directly +ls ./archive// ``` - **Index:** `index.html` & `index.json` HTML and JSON index files containing metadata and details @@ -346,6 +352,12 @@ The on-disk layout is optimized to be easy to browse by hand and durable long-te It does everything out-of-the-box by default, but you can disable or tweak [individual archive methods](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration) via environment variables or config file. +```bash +archivebox config --set SAVE_ARCHIVE_DOT_ORG=False +archivebox config --set YOUTUBEDL_ARGS='--max-filesize=500m' +archivebox config --help +``` +
lego graphic
@@ -445,7 +457,7 @@ archivebox add 'https://example.com#2020-10-25' archivebox server list -archivebox server detail +archivebox server detail diff --git a/archivebox/config.py b/archivebox/config.py index 349817ec..3d48344f 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -1079,6 +1079,6 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, if check_db: sql_index_path = Path(output_dir) / SQL_INDEX_FILENAME assert sql_index_path.exists(), ( - f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {config["OUTPUT_DIR"]}') + f'No database file {SQL_INDEX_FILENAME} found in: {config["OUTPUT_DIR"]} (Are you in an ArchiveBox collection directory?)') except KeyboardInterrupt: raise SystemExit(2) diff --git a/archivebox/core/migrations/0007_archiveresult.py b/archivebox/core/migrations/0007_archiveresult.py index ec48d3ff..29b269f6 100644 --- a/archivebox/core/migrations/0007_archiveresult.py +++ b/archivebox/core/migrations/0007_archiveresult.py @@ -36,8 +36,25 @@ def forwards_func(apps, schema_editor): for extractor in history: for result in history[extractor]: - ArchiveResult.objects.create(extractor=extractor, snapshot=snapshot, cmd=result["cmd"], cmd_version=result["cmd_version"] or 'unknown', - start_ts=result["start_ts"], end_ts=result["end_ts"], status=result["status"], pwd=result["pwd"], output=result["output"]) + try: + ArchiveResult.objects.create( + extractor=extractor, + snapshot=snapshot, + pwd=result["pwd"], + cmd=result.get("cmd") or [], + cmd_version=result.get("cmd_version") or 'unknown', + start_ts=result["start_ts"], + end_ts=result["end_ts"], + status=result["status"], + output=result.get("output") or 'null', + ) + except Exception as e: + print( + ' ! Skipping import due to missing/invalid index.json:', + out_dir, + e, + '(open an issue with this index.json for help)', + ) def verify_json_index_integrity(snapshot): diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py index 7501da3a..1ca4e801 100644 --- a/archivebox/index/schema.py +++ b/archivebox/index/schema.py @@ -55,11 +55,11 @@ class ArchiveResult: assert isinstance(self.end_ts, datetime) assert isinstance(self.cmd, list) assert all(isinstance(arg, str) and arg for arg in self.cmd) - assert self.pwd is None or isinstance(self.pwd, str) and self.pwd - assert self.cmd_version is None or isinstance(self.cmd_version, str) and self.cmd_version + + # TODO: replace emptystrings in these three with None / remove them from the DB + assert self.pwd is None or isinstance(self.pwd, str) + assert self.cmd_version is None or isinstance(self.cmd_version, str) assert self.output is None or isinstance(self.output, (str, Exception)) - if isinstance(self.output, str): - assert self.output @classmethod def guess_ts(_cls, dict_info): diff --git a/bin/build_deb.sh b/bin/build_deb.sh index b9279369..8c5c7fcf 100755 --- a/bin/build_deb.sh +++ b/bin/build_deb.sh @@ -10,14 +10,6 @@ set -o nounset set -o pipefail IFS=$'\n' -REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )" - -if [[ -f "$REPO_DIR/.venv/bin/activate" ]]; then - source "$REPO_DIR/.venv/bin/activate" -else - echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv" -fi -cd "$REPO_DIR" CURRENT_PLAFORM="$(uname)" REQUIRED_PLATFORM="Linux" @@ -26,30 +18,27 @@ if [[ "$CURRENT_PLAFORM" != "$REQUIRED_PLATFORM" ]]; then exit 0 fi -VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")" -DEBIAN_VERSION="1" -PGP_KEY_ID="7D5695D3B618872647861D51C38137A7C1675988" -# make sure you have this in ~/.dput.cf: -# [archivebox-ppa] -# fqdn: ppa.launchpad.net -# method: ftp -# incoming: ~archivebox/ubuntu/archivebox/ -# login: anonymous -# allow_unsigned_uploads: 0 +REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )" +VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")" +DEBIAN_VERSION="${DEBIAN_VERSION:-1}" +cd "$REPO_DIR" + + +if [[ -f "$REPO_DIR/.venv/bin/activate" ]]; then + source "$REPO_DIR/.venv/bin/activate" +else + echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv" +fi # cleanup build artifacts rm -Rf build deb_dist dist archivebox-*.tar.gz -# make sure the stdeb.cfg file is up-to-date with all the dependencies # build source and binary packages +# make sure the stdeb.cfg file is up-to-date with all the dependencies python3 setup.py --command-packages=stdeb.command \ sdist_dsc --debian-version=$DEBIAN_VERSION \ bdist_deb -# sign the build with your PGP key ID -debsign -k "$PGP_KEY_ID" "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes" - -# push the build to launchpad ppa -# dput archivebox "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes" +# should output deb_dist/archivebox_0.5.4-1.{deb,changes,buildinfo,tar.gz} diff --git a/bin/release_deb.sh b/bin/release_deb.sh index dc1bff35..a470c4f3 100755 --- a/bin/release_deb.sh +++ b/bin/release_deb.sh @@ -10,11 +10,41 @@ set -o nounset set -o pipefail IFS=$'\n' + +CURRENT_PLAFORM="$(uname)" +REQUIRED_PLATFORM="Linux" +if [[ "$CURRENT_PLAFORM" != "$REQUIRED_PLATFORM" ]]; then + echo "[!] Skipping the Debian package build on $CURRENT_PLAFORM (it can only be run on $REQUIRED_PLATFORM)." + exit 0 +fi + + REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )" VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")" -SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')" +DEBIAN_VERSION="${DEBIAN_VERSION:-1}" cd "$REPO_DIR" +echo "[+] Loading PGP keys from env vars and filesystem..." +# https://github.com/ArchiveBox/debian-archivebox/settings/secrets/actions +PGP_KEY_ID="${PGP_KEY_ID:-BC2D21B0D84E16C437300B8652423FBED1586F45}" +[[ "${PGP_PUBLIC_KEY:-}" ]] && echo "$PGP_PUBLIC_KEY" > /tmp/archivebox_gpg.key.pub +[[ "${PGP_PRIVATE_KEY:-}" ]] && echo "$PGP_PRIVATE_KEY" > /tmp/archivebox_gpg.key +gpg --import /tmp/archivebox_gpg.key.pub || true +gpg --import --allow-secret-key-import /tmp/archivebox_gpg.key || true +echo "$PGP_KEY_ID:6:" | gpg --import-ownertrust || true + +echo "[*] Signing build and changelog with PGP..." +debsign --re-sign -k "$PGP_KEY_ID" "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes" + +# make sure you have this in ~/.dput.cf: +# [archivebox-ppa] +# fqdn: ppa.launchpad.net +# method: ftp +# incoming: ~archivebox/ubuntu/archivebox/ +# login: anonymous +# allow_unsigned_uploads: 0 + + echo "[^] Uploading to launchpad.net" -dput archivebox "deb_dist/archivebox_${VERSION}-1_source.changes" +dput -f archivebox "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes" diff --git a/bin/release_docker.sh b/bin/release_docker.sh index 344a456d..80353808 100755 --- a/bin/release_docker.sh +++ b/bin/release_docker.sh @@ -19,6 +19,7 @@ cd "$REPO_DIR" echo "[^] Uploading docker image" # docker login --username=nikisweeting # docker login docker.pkg.github.com --username=pirate +docker push archivebox/archivebox:$VERSION archivebox/archivebox:$SHORT_VERSION archivebox/archivebox:latest docker push docker.io/nikisweeting/archivebox docker push docker.io/archivebox/archivebox docker push docker.pkg.github.com/archivebox/archivebox/archivebox diff --git a/package.json b/package.json index 29d73765..02247f7d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "archivebox", - "version": "0.5.4", + "version": "0.5.6", "description": "ArchiveBox: The self-hosted internet archive", "author": "Nick Sweeting ", "license": "MIT", diff --git a/setup.py b/setup.py index 692e5850..962db8d8 100755 --- a/setup.py +++ b/setup.py @@ -33,11 +33,10 @@ VERSION = json.loads((PACKAGE_DIR / "package.json").read_text().strip())['versio # print('>', sys.executable, *sys.argv) -class CustomTest(test): +class DisabledTestCommand(test): def run(self): # setup.py test is deprecated, disable it here by force so stdeb doesnt run it - #super().run() - pass + print('Use the ./bin/test.sh script to run tests, not setup.py test.') setuptools.setup( @@ -129,6 +128,6 @@ setuptools.setup( "Typing :: Typed", ], cmdclass={ - "test": CustomTest, + "test": DisabledTestCommand, }, ) diff --git a/stdeb.cfg b/stdeb.cfg index a07147e2..cd191a42 100644 --- a/stdeb.cfg +++ b/stdeb.cfg @@ -7,3 +7,4 @@ Suite3: focal Build-Depends: dh-python, python3-pip, python3-setuptools, python3-wheel, python3-stdeb Depends3: nodejs, chromium-browser, wget, curl, git, ffmpeg, youtube-dl, python3-atomicwrites, python3-croniter, python3-crontab, python3-dateparser, python3-django, python3-django-extensions, python3-django-jsonfield, python3-mypy-extensions, python3-requests, python3-w3lib, ripgrep XS-Python-Version: >= 3.7 +Setup-Env-Vars: DEB_BUILD_OPTIONS=nocheck