1
0
Fork 0
mirror of synced 2024-06-17 18:04:33 +12:00

Merge pull request #552 from ArchiveBox/v0.5.0

This commit is contained in:
Nick Sweeting 2021-01-06 20:02:21 +02:00 committed by GitHub
commit c322bc59b6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
106 changed files with 2141 additions and 1854 deletions

View file

@ -3,6 +3,8 @@
*.pyc
__pycache__/
.mypy_cache/
.pytest_cache/
.github/
venv/
.venv/
@ -10,6 +12,10 @@ venv/
build/
dist/
pip_dist/
!pip_dist/archivebox.egg-info/requires.txt
brew_dist/
assets/
data/
output/

76
.github/workflows/debian.yml vendored Normal file
View file

@ -0,0 +1,76 @@
name: Build Debian package
on:
workflow_dispatch:
push:
env:
DEB_BUILD_OPTIONS: nocheck
jobs:
build:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
with:
submodules: true
fetch-depth: 1
- name: Install packaging dependencies
run: |
sudo apt install -y \
python3 python3-dev python3-pip python3-venv python3-all \
dh-python debhelper devscripts dput software-properties-common \
python3-distutils python3-setuptools python3-wheel python3-stdeb
- name: Build Debian/Apt sdist_dsc
run: |
rm -Rf deb_dist/*
python3 setup.py --command-packages=stdeb.command sdist_dsc
- name: Build Debian/Apt bdist_deb
run: |
python3 setup.py --command-packages=stdeb.command bdist_deb
- name: Install archivebox from deb
run: |
cd deb_dist/
sudo apt install ./archivebox*.deb
- name: Check ArchiveBox version
run: |
# must create dir needed for snaps to run as non-root on github actions
sudo mkdir -p /run/user/1001 && sudo chmod -R 777 /run/user/1001
mkdir "${{ github.workspace }}/data" && cd "${{ github.workspace }}/data"
archivebox init
archivebox config --set SAVE_READABILITY=False
archivebox config --set SAVE_MERCURY=False
archivebox config --set SAVE_SINGLEFILE=False
archivebox --version
- name: Add some links to test
run: |
cd "${{ github.workspace }}/data"
archivebox add 'https://example.com'
archivebox status
# - name: Commit built package
# run: |
# cd deb_dist/
# git config --local user.email "action@github.com"
# git config --local user.name "GitHub Action"
# git commit -m "Debian package autobuild" -a
# - name: Push build to Github
# uses: ad-m/github-push-action@master
# with:
# github_token: ${{ secrets.GITHUB_TOKEN }}
# repository: ArchiveBox/debian-archivebox
# branch: ${{ github.ref }}
# directory: deb_dist
# - name: Push build to Launchpad PPA
# run: |
# debsign -k "$PGP_KEY_ID" "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes"
# dput archivebox "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes"

View file

@ -1,4 +1,4 @@
name: Docker Push
name: Build Docker image
on:
push:
@ -8,6 +8,10 @@ on:
types:
- created
env:
DOCKER_IMAGE: archivebox-ci
jobs:
buildx:
runs-on: ubuntu-latest
@ -17,20 +21,29 @@ jobs:
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Checkout
uses: actions/checkout@v2
with:
submodules: true
fetch-depth: 1
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v1
with:
version: latest
install: true
- name: Builder instance name
run: echo ${{ steps.buildx.outputs.name }}
- name: Available platforms
run: echo ${{ steps.buildx.outputs.platforms }}
- name: Cache Docker layers
uses: actions/cache@v2
with:
@ -38,6 +51,7 @@ jobs:
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-
- name: Build and push
id: docker_build
uses: docker/build-push-action@v2
@ -54,5 +68,6 @@ jobs:
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
platforms: linux/amd64,linux/arm64,linux/arm/v7
- name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}

50
.github/workflows/homebrew.yml vendored Normal file
View file

@ -0,0 +1,50 @@
name: Build Homebrew package
on:
workflow_dispatch:
push:
jobs:
build:
runs-on: macos-latest
steps:
- uses: actions/checkout@v2
with:
submodules: true
fetch-depth: 1
# TODO: modify archivebox.rb to update src url, hashes, and dependencies
- name: Build Homebrew Bottle
run: |
pip3 install --upgrade pip setuptools wheel
cd brew_dist/
brew install --build-bottle ./archivebox.rb
# brew bottle archivebox
- name: Add some links to test
run: |
mkdir data && cd data
archivebox init
archivebox add 'https://example.com'
archivebox version
archivebox status
# - name: Commit built package
# run: |
# cd brew_dist/
# git config --local user.email "action@github.com"
# git config --local user.name "GitHub Action"
# git commit -m "Homebrew package autobuild" -a
# - name: Push build to Github
# uses: ad-m/github-push-action@master
# with:
# github_token: ${{ secrets.GITHUB_TOKEN }}
# repository: ArchiveBox/homebrew-archivebox
# branch: ${{ github.ref }}
# directory: brew_dist
# TODO: push bottle homebrew core PR with latest changes

34
.github/workflows/lint.yml vendored Normal file
View file

@ -0,0 +1,34 @@
name: Run linters
on:
workflow_dispatch:
push:
env:
MAX_LINE_LENGTH: 110
jobs:
lint:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
with:
submodules: true
fetch-depth: 1
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: 3.9
architecture: x64
- name: Install flake8
run: |
pip install flake8
- name: Lint with flake8
run: |
# one pass for show-stopper syntax errors or undefined names
flake8 archivebox --count --show-source --statistics
# one pass for small stylistic things
flake8 archivebox --count --max-line-length="$MAX_LINE_LENGTH" --statistics

61
.github/workflows/pip.yml vendored Normal file
View file

@ -0,0 +1,61 @@
name: Build pip package
on:
workflow_dispatch:
push:
jobs:
build:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
with:
submodules: true
fetch-depth: 1
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: 3.9
architecture: x64
- name: Build Python Package
run: |
pip3 install --upgrade pip setuptools wheel
rm -Rf pip_dist/*.whl
python3 setup.py \
sdist --dist-dir=./pip_dist \
bdist_wheel --dist-dir=./pip_dist \
egg_info --egg-base=./pip_dist
pip install pip_dist/archivebox-*.whl
- name: Add some links to test
run: |
mkdir data && cd data
archivebox init
archivebox add 'https://example.com'
archivebox version
archivebox status
# - name: Commit built package
# run: |
# cd pip_dist/
# git config --local user.email "action@github.com"
# git config --local user.name "GitHub Action"
# git commit -m "Pip package autobuild" -a
# - name: Push build to Github
# uses: ad-m/github-push-action@master
# with:
# github_token: ${{ secrets.GITHUB_TOKEN }}
# repository: ArchiveBox/pip-archivebox
# branch: ${{ github.ref }}
# directory: pip_dist
# - name: Push build to PyPI
# run: |
# cd pip_dist/
# python3 -m twine upload --repository testpypi pip_dist/*.{whl,tar.gz}
# python3 -m twine upload --repository pypi pip_dist/*.{whl,tar.gz}

View file

@ -1,44 +1,25 @@
name: 'Lint, Test, and Build'
name: Run tests
on: [push]
env:
MAX_LINE_LENGTH: 110
DOCKER_IMAGE: archivebox-ci
PYTHONIOENCODING: utf-8
PYTHONLEGACYWINDOWSSTDIO: utf-8
USE_COLOR: False
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: 3.8
architecture: x64
- name: Install flake8
run: |
pip install flake8
- name: Lint with flake8
run: |
# one pass for show-stopper syntax errors or undefined names
flake8 archivebox --count --show-source --statistics
# one pass for small stylistic things
flake8 archivebox --count --max-line-length="$MAX_LINE_LENGTH" --statistics
test:
python_tests:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
python: [3.7, 3.8]
os: [ubuntu-20.04, macos-latest, windows-latest]
python: [3.7]
steps:
- uses: actions/checkout@v2
with:
submodules: true
fetch-depth: 1
### Setup Python & JS Languages
@ -70,8 +51,9 @@ jobs:
- name: Install pip dependencies
run: |
python -m pip install --upgrade pip setuptools wheel pytest bottle
./bin/build_pip.sh
python -m pip install .
python -m pip install pytest bottle
- name: Get npm cache dir
id: npm-cache
@ -98,19 +80,25 @@ jobs:
- name: Directory listing for debugging
run: |
pwd
ls -a ./
ls
- name: Archivebox version
run: |
archivebox version
- name: Test built package with pytest
# TODO: remove this exception for windows once we get tests passing on that platform
if: ${{ !contains(matrix.os, 'windows') }}
run: |
python -m pytest -s
python -m pytest -s --ignore=archivebox/vendor
docker-test:
docker_tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
submodules: true
fetch-depth: 1
# TODO: as of 2020-11 this helper layer broke, upgrade and re-enable this once it's usable again
@ -122,8 +110,8 @@ jobs:
- name: Init data dir
run: |
mkdir data
docker run -v "$PWD"/data:/data "$DOCKER_IMAGE" init
mkdir "${{ github.workspace }}/data"
docker run -v "${{ github.workspace }}/data":/data "$DOCKER_IMAGE" init
- name: Run test server
run: |
@ -149,7 +137,7 @@ jobs:
docker-compose up -d
sleep 5
curl --silent --location 'http://127.0.0.1:8000' | grep 'ArchiveBox'
curl --silent --location 'http://127.0.0.1:8000/static/admin/js/jquery.init.js' | grep 'django.jQuery'
curl --silent --location 'http://127.0.0.1:8000/static/admin/js/jquery.init.js' | grep 'window.django'
- name: Check added urls show up in index
run: |

3
.gitignore vendored
View file

@ -3,6 +3,7 @@
*.pyc
__pycache__/
.mypy_cache/
tests/out/
# Python and Node dependencies
venv/
@ -11,9 +12,9 @@ venv/
node_modules/
# Packaging artifacts
archivebox.egg-info
archivebox-*.tar.gz
build/
deb_dist/
dist/
# Data folders

26
.gitmodules vendored
View file

@ -1,3 +1,25 @@
[submodule "docs"]
path = docs
url = https://github.com/pirate/ArchiveBox.wiki.git
path = docs
url = https://github.com/ArchiveBox/ArchiveBox.wiki.git
[submodule "deb_dist"]
path = deb_dist
url = https://github.com/ArchiveBox/debian-archivebox.git
[submodule "brew_dist"]
path = brew_dist
url = https://github.com/ArchiveBox/homebrew-archivebox.git
[submodule "pip_dist"]
path = pip_dist
url = https://github.com/ArchiveBox/pip-archivebox.git
[submodule "docker"]
path = docker
url = https://github.com/ArchiveBox/docker-archivebox.git
[submodule "archivebox/vendor/base32-crockford"]
path = archivebox/vendor/base32-crockford
url = https://github.com/jbittel/base32-crockford
[submodule "archivebox/vendor/pocket"]
path = archivebox/vendor/pocket
url = https://github.com/tapanpandita/pocket
[submodule "archivebox/vendor/django-taggit"]
path = archivebox/vendor/django-taggit
url = https://github.com/jazzband/django-taggit

View file

@ -7,7 +7,7 @@
# docker run -v "$PWD/data":/data -it archivebox manage createsuperuser
# docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
FROM python:3.8-slim-buster
FROM python:3.9-slim-buster
LABEL name="archivebox" \
maintainer="Nick Sweeting <archivebox-docker@sweeting.me>" \
@ -46,13 +46,20 @@ RUN apt-get update -qq \
# Install apt dependencies
RUN apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
wget curl chromium git ffmpeg youtube-dl \
wget curl chromium git ffmpeg youtube-dl ripgrep \
fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
&& rm -rf /var/lib/apt/lists/*
# Install apt development dependencies
# RUN apt-get install -qq \
# && apt-get install -qq -y --no-install-recommends \
# python3 python3-dev python3-pip python3-venv python3-all \
# dh-python debhelper devscripts dput software-properties-common \
# python3-distutils python3-setuptools python3-wheel python3-stdeb
# Install Node environment
RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
&& echo 'deb https://deb.nodesource.com/node_14.x buster main' >> /etc/apt/sources.list \
&& echo 'deb https://deb.nodesource.com/node_15.x buster main' >> /etc/apt/sources.list \
&& apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
nodejs \
@ -62,7 +69,6 @@ RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add -
WORKDIR "$NODE_DIR"
ENV PATH="${PATH}:$NODE_DIR/node_modules/.bin" \
npm_config_loglevel=error
RUN npm install -g npm
ADD ./package.json ./package.json
ADD ./package-lock.json ./package-lock.json
RUN npm ci
@ -72,16 +78,17 @@ WORKDIR "$CODE_DIR"
ENV PATH="${PATH}:$VENV_PATH/bin"
RUN python -m venv --clear --symlinks "$VENV_PATH" \
&& pip install --upgrade --quiet pip setuptools
ADD ./archivebox.egg-info/requires.txt "$CODE_DIR/archivebox.egg-info/requires.txt"
ADD ./pip_dist/archivebox.egg-info/requires.txt "$CODE_DIR/pip_dist/archivebox.egg-info/requires.txt"
RUN apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
build-essential python-dev python3-dev \
&& grep -B 1000 -E '^$' "$CODE_DIR/archivebox.egg-info/requires.txt" | pip install --quiet -r /dev/stdin \
&& grep -B 1000 -E '^$' "$CODE_DIR/pip_dist/archivebox.egg-info/requires.txt" | pip install --quiet -r /dev/stdin \
&& pip install --quiet "sonic-client==0.0.5" \
&& apt-get purge -y build-essential python-dev python3-dev \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
# Install ArchiveBox Python package
# Install ArchiveBox Python package and its dependencies
WORKDIR "$CODE_DIR"
ADD . "$CODE_DIR"
RUN pip install -e .
@ -99,7 +106,8 @@ ENV IN_DOCKER=True \
MERCURY_BINARY="$NODE_DIR/node_modules/.bin/mercury-parser"
# Print version for nice docker finish summary
RUN archivebox version
# RUN archivebox version
RUN /app/bin/docker_entrypoint.sh archivebox version
# Open up the interfaces to the outside world
VOLUME "$DATA_DIR"

321
README.md
View file

@ -26,62 +26,175 @@
<hr/>
</div>
ArchiveBox is a powerful self-hosted internet archiving solution written in Python 3. You feed it URLs of pages you want to archive, and it saves them to disk in a varitety of formats depending on the configuration and the content it detects. ArchiveBox can be installed via [Docker](https://docs.docker.com/get-docker/) (recommended), [`apt`](https://launchpad.net/~archivebox/+archive/ubuntu/archivebox/+packages), [`brew`](https://github.com/ArchiveBox/homebrew-archivebox), or [`pip`](https://www.python.org/downloads/). It works on macOS, Windows, and Linux/BSD (both armv7 and amd64).
ArchiveBox is a powerful self-hosted internet archiving solution written in Python 3. You feed it URLs of pages you want to archive, and it saves them to disk in a varitety of formats depending on the configuration and the content it detects.
Once installed, URLs can be added via the command line `archivebox add` or the built-in Web UI `archivebox server`. It can ingest bookmarks from a service like Pocket/Pinboard, your entire browsing history, RSS feeds, or URLs one at a time.
Your archive can be managed through the command line with commands like `archivebox add`, through the built-in Web UI `archivebox server`, or via the Python library API (beta). It can ingest bookmarks from a browser or service like Pocket/Pinboard, your entire browsing history, RSS feeds, or URLs one at a time. You can also schedule regular/realtime imports with `archivebox schedule`.
The main index is a self-contained `data/index.sqlite3` file, and each snapshot is stored as a folder `data/archive/<timestamp>/`, with an easy-to-read `index.html` and `index.json` within. For each page, ArchiveBox auto-extracts many types of assets/media and saves them in standard formats, with out-of-the-box support for: 3 types of HTML snapshots (wget, Chrome headless, singlefile), a PDF snapshot, a screenshot, a WARC archive, git repositories, images, audio, video, subtitles, article text, and more. The snapshots are browseable and managable offline through the filesystem, the built-in webserver, or the Python API.
The main index is a self-contained `index.sqlite3` file, and each snapshot is stored as a folder `data/archive/<timestamp>/`, with an easy-to-read `index.html` and `index.json` within. For each page, ArchiveBox auto-extracts many types of assets/media and saves them in standard formats, with out-of-the-box support for: several types of HTML snapshots (wget, Chrome headless, singlefile), PDF snapshotting, screenshotting, WARC archiving, git repositories, images, audio, video, subtitles, article text, and more. The snapshots are browseable and managable offline through the filesystem, the built-in webserver, or the Python library API.
#### Quickstart
### Quickstart
It works on Linux/BSD (Intel and ARM CPUs with `docker`/`apt`/`pip3`), macOS (with `docker`/`brew`/`pip3`), and Windows (beta with `docker`/`pip3`).
**First, get ArchiveBox using your system package manager, Docker, or pip:**
```bash
# You can run it with Docker or Docker Compose (recommended)
docker pull archivebox/archivebox
# https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/docker-compose.yml
pip3 install archivebox
archivebox --version
# install extras as-needed, or use one of full setup methods below to get everything out-of-the-box
# or Ubuntu/Debian
mkdir ~/archivebox && cd ~/archivebox # this can be anywhere
archivebox init
archivebox add 'https://example.com'
archivebox add --depth=1 'https://example.com'
archivebox schedule --every=day https://getpocket.com/users/USERNAME/feed/all
archivebox oneshot --extract=title,favicon,media https://www.youtube.com/watch?v=dQw4w9WgXcQ
archivebox help # to see more options
```
*(click to expand the sections below for full setup instructions)*
<details>
<summary><b>Get ArchiveBox with <code>docker-compose</code> on any platform (recommended, everything included out-of-the-box)</b></summary>
First make sure you have Docker installed: https://docs.docker.com/get-docker/
<br/><br/>
This is the recommended way to run ArchiveBox because it includes *all* the extractors like chrome, wget, youtube-dl, git, etc., as well as full-text search with sonic, and many other great features.
```bash
# create a new empty directory and initalize your collection (can be anywhere)
mkdir ~/archivebox && cd ~/archivebox
curl -O https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/docker-compose.yml
docker-compose run archivebox init
docker-compose run archivebox --version
# start the webserver and open the UI (optional)
docker-compose run archivebox manage createsuperuser
docker-compose up -d
open http://127.0.0.1:8000
# you can also add links and manage your archive via the CLI:
docker-compose run archivebox add 'https://example.com'
docker-compose run archivebox status
docker-compose run archivebox help # to see more options
```
</details>
<details>
<summary><b>Get ArchiveBox with <code>docker</code> on any platform</b></summary>
First make sure you have Docker installed: https://docs.docker.com/get-docker/<br/>
```bash
# create a new empty directory and initalize your collection (can be anywhere)
mkdir ~/archivebox && cd ~/archivebox
docker run -v $PWD:/data -it archivebox/archivebox init
docker run -v $PWD:/data -it archivebox/archivebox --version
# start the webserver and open the UI (optional)
docker run -v $PWD:/data -it archivebox/archivebox manage createsuperuser
docker run -v $PWD:/data -p 8000:8000 archivebox/archivebox server 0.0.0.0:8000
open http://127.0.0.1:8000
# you can also add links and manage your archive via the CLI:
docker run -v $PWD:/data -it archivebox/archivebox add 'https://example.com'
docker run -v $PWD:/data -it archivebox/archivebox status
docker run -v $PWD:/data -it archivebox/archivebox help # to see more options
```
</details>
<details>
<summary><b>Get ArchiveBox with <code>apt</code> on Ubuntu >=20.04</b></summary>
```bash
sudo add-apt-repository -u ppa:archivebox/archivebox
apt install archivebox
sudo apt install archivebox
# or macOS
# create a new empty directory and initalize your collection (can be anywhere)
mkdir ~/archivebox && cd ~/archivebox
npm install --prefix . 'git+https://github.com/ArchiveBox/ArchiveBox.git'
archivebox init
archivebox --version
# start the webserver and open the web UI (optional)
archivebox manage createsuperuser
archivebox server 0.0.0.0:8000
open http://127.0.0.1:8000
# you can also add URLs and manage the archive via the CLI and filesystem:
archivebox add 'https://example.com'
archivebox status
archivebox list --html --with-headers > index.html
archivebox list --json --with-headers > index.json
archivebox help # to see more options
```
For other Debian-based systems or older Ubuntu systems you can add these sources to `/etc/apt/sources.list`:
```bash
deb http://ppa.launchpad.net/archivebox/archivebox/ubuntu focal main
deb-src http://ppa.launchpad.net/archivebox/archivebox/ubuntu focal main
```
(you may need to install some other dependencies manually however)
</details>
<details>
<summary><b>Get ArchiveBox with <code>brew</code> on macOS >=10.13</b></summary>
```bash
brew install archivebox/archivebox/archivebox
# or for the Python version only, without wget/git/chrome/etc. included
# create a new empty directory and initalize your collection (can be anywhere)
mkdir ~/archivebox && cd ~/archivebox
npm install --prefix . 'git+https://github.com/ArchiveBox/ArchiveBox.git'
archivebox init
archivebox --version
# start the webserver and open the web UI (optional)
archivebox manage createsuperuser
archivebox server 0.0.0.0:8000
open http://127.0.0.1:8000
# you can also add URLs and manage the archive via the CLI and filesystem:
archivebox add 'https://example.com'
archivebox status
archivebox list --html --with-headers > index.html
archivebox list --json --with-headers > index.json
archivebox help # to see more options
```
</details>
<details>
<summary><b>Get ArchiveBox with <code>pip</code> on any platform</b></summary>
```bash
pip3 install archivebox
# If you're using an apt/brew/pip install you can run archivebox commands normally
# archivebox [subcommand] [...args]
# If you're using Docker you'll have to run the commands like this
# docker run -v $PWD:/data -it archivebox/archivebox [subcommand] [...args]
# And the equivalent in Docker Compose:
# docker-compose run archivebox [subcommand] [...args]
```
<small>Check that everything installed correctly with `archivebox --version`</small>
**To start using archivebox, you have to create a data folder and `cd` into it:**
```bash
mkdir ~/archivebox && cd ~/archivebox # you can put the collection dir anywhere
# create a new empty directory and initalize your collection (can be anywhere)
mkdir ~/archivebox && cd ~/archivebox
npm install --prefix . 'git+https://github.com/ArchiveBox/ArchiveBox.git'
archivebox init
archivebox --version
# Install any missing extras like wget/git/chrome/etc. manually as needed
# start the webserver and open the web UI (optional)
archivebox manage createsuperuser
archivebox server 0.0.0.0:8000
open http://127.0.0.1:8000
# you can also add URLs and manage the archive via the CLI and filesystem:
archivebox add 'https://example.com'
archivebox status
archivebox list --html --with-headers > index.html
archivebox list --json --with-headers > index.json
archivebox help # to see more options
```
**Then Add some URLs to your archive collection:**
```bash
archivebox add https://github.com/ArchiveBox/ArchiveBox
archivebox add --depth=1 https://example.com
```
**View the snapshots of the URLs you added via the self-hosted web UI:**
```bash
archivebox manage createsuperuser # create an admin acct
archivebox server 0.0.0.0:8000 # start the web server
open http://127.0.0.1:8000/ # open the interactive admin panel
ls ~/archivebox/archive/*/index.html # or browse the snapshots on disk
```
</details>
---
<div align="center">
<img src="https://i.imgur.com/lUuicew.png" width="400px">
<br/>
@ -97,9 +210,9 @@ For more information, see the <a href="https://github.com/ArchiveBox/ArchiveBox/
ArchiveBox is a command line tool, self-hostable web-archiving server, and Python library all-in-one. It can be installed on Docker, macOS, and Linux/BSD, and Windows. You can download and install it as a Debian/Ubuntu package, Homebrew package, Python3 package, or a Docker image. No matter which install method you choose, they all provide the same CLI, Web UI, and on-disk data format.
To use ArchiveBox you start by creating a folder for your data to live in (it can be anywhere on your system), and running `archivebox init` inside of it. That will create a sqlite3 index and an `ArchiveBox.conf` file. After that, you can continue to add/export/manage/etc using the CLI `archivebox help`, or you can run the Web UI (recommended).
To use ArchiveBox you start by creating a folder for your data to live in (it can be anywhere on your system), and running `archivebox init` inside of it. That will create a sqlite3 index and an `ArchiveBox.conf` file. After that, you can continue to add/export/manage/etc using the CLI `archivebox help`, or you can run the Web UI (recommended). If you only want to archive a single site, you can run `archivebox oneshot` to avoid having to create a whole collection.
The CLI is considered "stable", the ArchiveBox Python API and REST APIs are in "beta", and the [desktop app](https://github.com/ArchiveBox/desktop) is in "alpha" stage.
The CLI is considered "stable", the ArchiveBox Python API and REST APIs are "beta", and the [desktop app](https://github.com/ArchiveBox/desktop) is "alpha".
At the end of the day, the goal is to sleep soundly knowing that the part of the internet you care about will be automatically preserved in multiple, durable long-term formats that will be accessible for decades (or longer). You can also self-host your archivebox server on a public domain to provide archive.org-style public access to your site snapshots.
@ -146,7 +259,7 @@ archivebox add --depth=1 'https://news.ycombinator.com#2020-12-12'
See the [Usage: CLI](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#CLI-Usage) page for documentation and examples.
It also includes a built-in scheduled import feature and browser bookmarklet, so you can ingest URLs from RSS feeds, websites, or the filesystem regularly.
It also includes a built-in scheduled import feature with `archivebox schedule` and browser bookmarklet, so you can pull in URLs from RSS feeds, websites, or the filesystem regularly/on-demand.
## Output formats
@ -161,11 +274,14 @@ The on-disk layout is optimized to be easy to browse by hand and durable long-te
- **Index:** `index.html` & `index.json` HTML and JSON index files containing metadata and details
- **Title:** `title` title of the site
- **Favicon:** `favicon.ico` favicon of the site
- **Headers:** `headers.json` Any HTTP headers the site returns are saved in a json file
- **SingleFile:** `singlefile.html` HTML snapshot rendered with headless Chrome using SingleFile
- **WGET Clone:** `example.com/page-name.html` wget clone of the site, with .html appended if not present
- **WARC:** `warc/<timestamp>.gz` gzipped WARC of all the resources fetched while archiving
- **PDF:** `output.pdf` Printed PDF of site using headless chrome
- **Screenshot:** `screenshot.png` 1440x900 screenshot of site using headless chrome
- **DOM Dump:** `output.html` DOM Dump of the HTML after rendering using headless chrome
- **Readability:** `article.html/json` Article text extraction using Readability
- **URL to Archive.org:** `archive.org.txt` A link to the saved site on archive.org
- **Audio & Video:** `media/` all audio/video files + playlists, including subtitles & metadata with youtube-dl
- **Source Code:** `git/` clone of any repository found on github, bitbucket, or gitlab links
@ -191,8 +307,8 @@ archivebox add 'https://example.com/any/url/you/want/to/keep/secret/'
# without first disabling share the URL with 3rd party APIs:
archivebox config --set SAVE_ARCHIVE_DOT_ORG=False # disable saving all URLs in Archive.org
archivebox config --set SAVE_FAVICON=False # optional: only the domain is leaked, not full URL
archivebox config --get CHROME_VERSION # optional: set this to chromium instead of chrome if you don't like Google
archivebox config --set SAVE_FAVICON=False # optional: only the domain is leaked, not full URL
archivebox config --set CHROME_BINARY=chromium # optional: switch to chromium to avoid Chrome phoning home to Google
```
Be aware that malicious archived JS can also read the contents of other pages in your archive due to snapshot CSRF and XSS protections being imperfect. See the [Security Overview](https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#stealth-mode) page for more details.
@ -215,95 +331,6 @@ archivebox add 'https://example.com#2020-10-25'
---
# Setup
## Docker Compose
*This is the recommended way of running ArchiveBox.*
It comes with everything working out of the box, including all extractors,
a headless browser runtime, a full webserver, and CLI interface.
```bash
# docker-compose run archivebox <command> [args]
mkdir archivebox && cd archivebox
wget 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/docker-compose.yml'
docker-compose run archivebox init
docker-compose run archivebox add 'https://example.com'
docker-compose run archivebox manage createsuperuser
docker-compose up
open http://127.0.0.1:8000
```
## Docker
```bash
# docker run -v $PWD:/data -it archivebox/archivebox <command> [args]
mkdir archivebox && cd archivebox
docker run -v $PWD:/data -it archivebox/archivebox init
docker run -v $PWD:/data -it archivebox/archivebox add 'https://example.com'
docker run -v $PWD:/data -it archivebox/archivebox manage createsuperuser
# run the webserver to access the web UI
docker run -v $PWD:/data -it -p 8000:8000 archivebox/archivebox server 0.0.0.0:8000
open http://127.0.0.1:8000
# or export a static version of the index if you dont want to run a server
docker run -v $PWD:/data -it archivebox/archivebox list --html --with-headers > index.html
docker run -v $PWD:/data -it archivebox/archivebox list --json --with-headers > index.json
open ./index.html
```
## Bare Metal
```bash
# archivebox <command> [args]
# on Debian/Ubuntu
sudo add-apt-repository -u ppa:archivebox/archivebox
apt install archivebox
# on macOS
brew install archivebox/archivebox/archivebox
```
Initialize your archive in a directory somewhere and add some links:
```bash
mkdir ~/archivebox && cd archivebox
npm install --prefix . 'git+https://github.com/ArchiveBox/ArchiveBox.git'
archivebox init
archivebox add 'https://example.com' # add URLs as args pipe them in via stdin
archivebox add --depth=1 https://example.com/table-of-contents.html
# it can injest links from many formats, including RSS/JSON/XML/MD/TXT and more
curl https://getpocket.com/users/USERNAME/feed/all | archivebox add
```
Start the webserver to access the web UI:
```bash
archivebox manage createsuperuser
archivebox server 0.0.0.0:8000
open http://127.0.0.1:8000
```
Or export a static HTML version of the index if you don't want to run a webserver:
```bash
archivebox list --html --with-headers > index.html
archivebox list --json --with-headers > index.json
open ./index.html
```
To view more information about your dependencies, data, or the CLI:
```bash
archivebox version
archivebox status
archivebox help
```
---
<div align="center">
<img src="https://i.imgur.com/PVO88AZ.png" width="80%"/>
</div>
@ -418,20 +445,19 @@ All contributions to ArchiveBox are welcomed! Check our [issues](https://github.
First, install the system dependencies from the "Bare Metal" section above.
Then you can clone the ArchiveBox repo and install
```python3
git clone https://github.com/ArchiveBox/ArchiveBox
cd ArchiveBox
git clone https://github.com/ArchiveBox/ArchiveBox && cd ArchiveBox
git checkout master # or the branch you want to test
git pull
git submodule update --init --recursive
git pull --recurse-submodules
# Install ArchiveBox + python dependencies
python3 -m venv .venv && source .venv/bin/activate && pip install -e .[dev]
# or
pipenv install --dev && pipenv shell
# or with pipenv: pipenv install --dev && pipenv shell
# Install node dependencies
npm install
# Optional: install the extractor dependencies
# Optional: install extractor dependencies manually or with helper script
./bin/setup.sh
# Optional: develop via docker by mounting the code dir into the container
@ -463,6 +489,17 @@ You can also run all these in Docker. For more examples see the Github Actions C
```
(uses `pytest -s`)
#### Make migrations or enter a django shell
```bash
cd archivebox/
./manage.py makemigrations
cd data/
archivebox shell
```
(uses `pytest -s`)
#### Build the docs, pip package, and docker image
```bash
@ -471,6 +508,8 @@ You can also run all these in Docker. For more examples see the Github Actions C
# or individually:
./bin/build_docs.sh
./bin/build_pip.sh
./bin/build_deb.sh
./bin/build_brew.sh
./bin/build_docker.sh
```

View file

@ -1,541 +0,0 @@
Metadata-Version: 2.1
Name: archivebox
Version: 0.4.24
Summary: The self-hosted internet archive.
Home-page: https://github.com/ArchiveBox/ArchiveBox
Author: Nick Sweeting
Author-email: git@nicksweeting.com
License: MIT
Project-URL: Source, https://github.com/ArchiveBox/ArchiveBox
Project-URL: Documentation, https://github.com/ArchiveBox/ArchiveBox/wiki
Project-URL: Bug Tracker, https://github.com/ArchiveBox/ArchiveBox/issues
Project-URL: Changelog, https://github.com/ArchiveBox/ArchiveBox/wiki/Changelog
Project-URL: Roadmap, https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap
Project-URL: Community, https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community
Project-URL: Donate, https://github.com/ArchiveBox/ArchiveBox/wiki/Donations
Description: <div align="center">
<em><img src="https://i.imgur.com/5B48E3N.png" height="90px"></em>
<h1>ArchiveBox<br/><sub>The open-source self-hosted web archive.</sub></h1>
▶️ <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart">Quickstart</a> |
<a href="https://archivebox.zervice.io/">Demo</a> |
<a href="https://github.com/ArchiveBox/ArchiveBox">Github</a> |
<a href="https://github.com/ArchiveBox/ArchiveBox/wiki">Documentation</a> |
<a href="#background--motivation">Info & Motivation</a> |
<a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community">Community</a> |
<a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap">Roadmap</a>
<pre>
"Your own personal internet archive" (网站存档 / 爬虫)
</pre>
<!--<a href="http://webchat.freenode.net?channels=ArchiveBox&uio=d4"><img src="https://img.shields.io/badge/Community_chat-IRC-%2328A745.svg"/></a>-->
<a href="https://github.com/ArchiveBox/ArchiveBox/blob/master/LICENSE"><img src="https://img.shields.io/badge/Open_source-MIT-green.svg?logo=git&logoColor=green"/></a>
<a href="https://github.com/ArchiveBox/ArchiveBox/commits/dev"><img src="https://img.shields.io/github/last-commit/ArchiveBox/ArchiveBox.svg?logo=Sublime+Text&logoColor=green&label=Active"/></a>
<a href="https://github.com/ArchiveBox/ArchiveBox"><img src="https://img.shields.io/github/stars/ArchiveBox/ArchiveBox.svg?logo=github&label=Stars&logoColor=blue"/></a>
<a href="https://test.pypi.org/project/archivebox/"><img src="https://img.shields.io/badge/Python-%3E%3D3.7-yellow.svg?logo=python&logoColor=yellow"/></a>
<a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Install#dependencies"><img src="https://img.shields.io/badge/Chromium-%3E%3D59-orange.svg?logo=Google+Chrome&logoColor=orange"/></a>
<a href="https://hub.docker.com/r/archivebox/archivebox"><img src="https://img.shields.io/badge/Docker-all%20platforms-lightblue.svg?logo=docker&logoColor=lightblue"/></a>
<hr/>
</div>
ArchiveBox is a powerful self-hosted internet archiving solution written in Python 3. You feed it URLs of pages you want to archive, and it saves them to disk in a varitety of formats depending on the configuration and the content it detects. ArchiveBox can be installed via [Docker](https://docs.docker.com/get-docker/) (recommended) or [`pip`](https://www.python.org/downloads/). It works on macOS, Windows, and Linux/BSD (both armv7 and amd64).
Once installed, URLs can be added via the command line `archivebox add` or the built-in Web UI `archivebox server`. It can ingest bookmarks from a service like Pocket/Pinboard, your entire browsing history, RSS feeds, or URLs one at a time.
The main index is a self-contained `data/index.sqlite3` file, and each snapshot is stored as a folder `data/archive/<timestamp>/`, with an easy-to-read `index.html` and `index.json` within. For each page, ArchiveBox auto-extracts many types of assets/media and saves them in standard formats, with out-of-the-box support for: 3 types of HTML snapshots (wget, Chrome headless, singlefile), a PDF snapshot, a screenshot, a WARC archive, git repositories, images, audio, video, subtitles, article text, and more. The snapshots are browseable and managable offline through the filesystem, the built-in webserver, or the Python API.
#### Quickstart
```bash
# 1. Create a folder somewhere to hold your ArchiveBox data
mkdir ~/archivebox && cd ~/archivebox
docker run -v $PWD:/data -it archivebox/archivebox init
# 2. Archive some URLs to get started
docker run -v $PWD:/data -t archivebox/archivebox add https://github.com/ArchiveBox/ArchiveBox
docker run -v $PWD:/data -t archivebox/archivebox add --depth=1 https://example.com
# 3. Then view the snapshots of the URLs you added via the self-hosted web UI
docker run -v $PWD:/data -it archivebox/archivebox manage createsuperuser # create an admin acct
docker run -v $PWD:/data -p 8000:8000 archivebox/archivebox # start the web server
open http://127.0.0.1:8000/ # open the interactive admin panel
ls archive/*/index.html # or just browse snapshots on disk
```
<div align="center">
<img src="https://i.imgur.com/lUuicew.png" width="400px">
<br/>
<a href="https://archivebox.zervice.io">DEMO: archivebox.zervice.io/</a>
For more information, see the <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart">full Quickstart guide</a>, <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Usage">Usage</a>, and <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration">Configuration</a> docs.
</div>
---
# Overview
ArchiveBox is a command line tool, self-hostable web-archiving server, and Python library all-in-one. It's available as a Python3 package or a Docker image, both methods provide the same CLI, Web UI, and on-disk data format.
It works on Docker, macOS, and Linux/BSD. Windows is not officially supported, but users have reported getting it working using the WSL2 + Docker.
To use ArchiveBox you start by creating a folder for your data to live in (it can be anywhere on your system), and running `archivebox init` inside of it. That will create a sqlite3 index and an `ArchiveBox.conf` file. After that, you can continue to add/remove/search/import/export/manage/config/etc using the CLI `archivebox help`, or you can run the Web UI (recommended):
```bash
archivebox manage createsuperuser
archivebox server 0.0.0.0:8000
open http://127.0.0.1:8000
```
The CLI is considered "stable", the ArchiveBox Python API and REST APIs are in "beta", and the [desktop app](https://github.com/ArchiveBox/desktop) is in "alpha" stage.
At the end of the day, the goal is to sleep soundly knowing that the part of the internet you care about will be automatically preserved in multiple, durable long-term formats that will be accessible for decades (or longer). You can also self-host your archivebox server on a public domain to provide archive.org-style public access to your site snapshots.
<div align="center">
<img src="https://i.imgur.com/3tBL7PU.png" width="22%" alt="CLI Screenshot" align="top">
<img src="https://i.imgur.com/viklZNG.png" width="22%" alt="Desktop index screenshot" align="top">
<img src="https://i.imgur.com/RefWsXB.jpg" width="22%" alt="Desktop details page Screenshot"/>
<img src="https://i.imgur.com/M6HhzVx.png" width="22%" alt="Desktop details page Screenshot"/><br/>
<sup><a href="https://archive.sweeting.me/">Demo</a> | <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Usage">Usage</a> | <a href="#screenshots">Screenshots</a></sup>
<br/>
<sub>. . . . . . . . . . . . . . . . . . . . . . . . . . . .</sub>
</div><br/>
## Key Features
- [**Free & open source**](https://github.com/ArchiveBox/ArchiveBox/blob/master/LICENSE), doesn't require signing up for anything, stores all data locally
- [**Few dependencies**](https://github.com/ArchiveBox/ArchiveBox/wiki/Install#dependencies) and [simple command line interface](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#CLI-Usage)
- [**Comprehensive documentation**](https://github.com/ArchiveBox/ArchiveBox/wiki), [active development](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap), and [rich community](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community)
- Easy to set up **[scheduled importing](https://github.com/ArchiveBox/ArchiveBox/wiki/Scheduled-Archiving) from multiple sources**
- Uses common, **durable, [long-term formats](#saves-lots-of-useful-stuff-for-each-imported-link)** like HTML, JSON, PDF, PNG, and WARC
- ~~**Suitable for paywalled / [authenticated content](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#chrome_user_data_dir)** (can use your cookies)~~ (do not do this until v0.5 is released with some security fixes)
- **Doesn't require a constantly-running daemon**, proxy, or native app
- Provides a CLI, Python API, self-hosted web UI, and REST API (WIP)
- Architected to be able to run [**many varieties of scripts during archiving**](https://github.com/ArchiveBox/ArchiveBox/issues/51), e.g. to extract media, summarize articles, [scroll pages](https://github.com/ArchiveBox/ArchiveBox/issues/80), [close modals](https://github.com/ArchiveBox/ArchiveBox/issues/175), expand comment threads, etc.
- Can also [**mirror content to 3rd-party archiving services**](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#submit_archive_dot_org) automatically for redundancy
## Input formats
ArchiveBox supports many input formats for URLs, including Pocket & Pinboard exports, Browser bookmarks, Browser history, plain text, HTML, markdown, and more!
```bash
echo 'http://example.com' | archivebox add
archivebox add 'https://example.com/some/page'
archivebox add < ~/Downloads/firefox_bookmarks_export.html
archivebox add < any_text_with_urls_in_it.txt
archivebox add --depth=1 'https://example.com/some/downloads.html'
archivebox add --depth=1 'https://news.ycombinator.com#2020-12-12'
```
- <img src="https://nicksweeting.com/images/bookmarks.png" height="22px"/> Browser history or bookmarks exports (Chrome, Firefox, Safari, IE, Opera, and more)
- <img src="https://nicksweeting.com/images/rss.svg" height="22px"/> RSS, XML, JSON, CSV, SQL, HTML, Markdown, TXT, or any other text-based format
- <img src="https://getpocket.com/favicon.ico" height="22px"/> Pocket, Pinboard, Instapaper, Shaarli, Delicious, Reddit Saved Posts, Wallabag, Unmark.it, OneTab, and more
See the [Usage: CLI](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#CLI-Usage) page for documentation and examples.
It also includes a built-in scheduled import feature and browser bookmarklet, so you can ingest URLs from RSS feeds, websites, or the filesystem regularly.
## Output formats
All of ArchiveBox's state (including the index, snapshot data, and config file) is stored in a single folder called the "ArchiveBox data folder". All `archivebox` CLI commands must be run from inside this folder, and you first create it by running `archivebox init`.
The on-disk layout is optimized to be easy to browse by hand and durable long-term. The main index is a standard sqlite3 database (it can also be exported as static JSON/HTML), and the archive snapshots are organized by date-added timestamp in the `archive/` subfolder. Each snapshot subfolder includes a static JSON and HTML index describing its contents, and the snapshot extrator outputs are plain files within the folder (e.g. `media/example.mp4`, `git/somerepo.git`, `static/someimage.png`, etc.)
```bash
ls ./archive/<timestamp>/
```
- **Index:** `index.html` & `index.json` HTML and JSON index files containing metadata and details
- **Title:** `title` title of the site
- **Favicon:** `favicon.ico` favicon of the site
- **WGET Clone:** `example.com/page-name.html` wget clone of the site, with .html appended if not present
- **WARC:** `warc/<timestamp>.gz` gzipped WARC of all the resources fetched while archiving
- **PDF:** `output.pdf` Printed PDF of site using headless chrome
- **Screenshot:** `screenshot.png` 1440x900 screenshot of site using headless chrome
- **DOM Dump:** `output.html` DOM Dump of the HTML after rendering using headless chrome
- **URL to Archive.org:** `archive.org.txt` A link to the saved site on archive.org
- **Audio & Video:** `media/` all audio/video files + playlists, including subtitles & metadata with youtube-dl
- **Source Code:** `git/` clone of any repository found on github, bitbucket, or gitlab links
- _More coming soon! See the [Roadmap](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap)..._
It does everything out-of-the-box by default, but you can disable or tweak [individual archive methods](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration) via environment variables or config file.
## Dependencies
You don't need to install all the dependencies, ArchiveBox will automatically enable the relevant modules based on whatever you have available, but it's recommended to use the official [Docker image](https://github.com/ArchiveBox/ArchiveBox/wiki/Docker) with everything preinstalled.
If you so choose, you can also install ArchiveBox and its dependencies directly on any Linux or macOS systems using the [automated setup script](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart) or the [system package manager](https://github.com/ArchiveBox/ArchiveBox/wiki/Install).
ArchiveBox is written in Python 3 so it requires `python3` and `pip3` available on your system. It also uses a set of optional, but highly recommended external dependencies for archiving sites: `wget` (for plain HTML, static files, and WARC saving), `chromium` (for screenshots, PDFs, JS execution, and more), `youtube-dl` (for audio and video), `git` (for cloning git repos), and `nodejs` (for readability and singlefile), and more.
## Caveats
If you're importing URLs containing secret slugs or pages with private content (e.g Google Docs, CodiMD notepads, etc), you may want to disable some of the extractor modules to avoid leaking private URLs to 3rd party APIs during the archiving process.
```bash
# don't do this:
archivebox add 'https://docs.google.com/document/d/12345somelongsecrethere'
archivebox add 'https://example.com/any/url/you/want/to/keep/secret/'
# without first disabling share the URL with 3rd party APIs:
archivebox config --set SAVE_ARCHIVE_DOT_ORG=False # disable saving all URLs in Archive.org
archivebox config --set SAVE_FAVICON=False # optional: only the domain is leaked, not full URL
archivebox config --get CHROME_VERSION # optional: set this to chromium instead of chrome if you don't like Google
```
Be aware that malicious archived JS can also read the contents of other pages in your archive due to snapshot CSRF and XSS protections being imperfect. See the [Security Overview](https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#stealth-mode) page for more details.
```bash
# visiting an archived page with malicious JS:
https://127.0.0.1:8000/archive/1602401954/example.com/index.html
# example.com/index.js can now make a request to read everything:
https://127.0.0.1:8000/index.html
https://127.0.0.1:8000/archive/*
# then example.com/index.js can send it off to some evil server
```
Support for saving multiple snapshots of each site over time will be [added soon](https://github.com/ArchiveBox/ArchiveBox/issues/179) (along with the ability to view diffs of the changes between runs). For now ArchiveBox is designed to only archive each URL with each extractor type once. A workaround to take multiple snapshots of the same URL is to make them slightly different by adding a hash:
```bash
archivebox add 'https://example.com#2020-10-24'
...
archivebox add 'https://example.com#2020-10-25'
```
---
# Setup
## Docker Compose
*This is the recommended way of running ArchiveBox.*
It comes with everything working out of the box, including all extractors,
a headless browser runtime, a full webserver, and CLI interface.
```bash
# docker-compose run archivebox <command> [args]
mkdir archivebox && cd archivebox
wget 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/docker-compose.yml'
docker-compose run archivebox init
docker-compose run archivebox add 'https://example.com'
docker-compose run archivebox manage createsuperuser
docker-compose up
open http://127.0.0.1:8000
```
## Docker
```bash
# docker run -v $PWD:/data -it archivebox/archivebox <command> [args]
mkdir archivebox && cd archivebox
docker run -v $PWD:/data -it archivebox/archivebox init
docker run -v $PWD:/data -it archivebox/archivebox add 'https://example.com'
docker run -v $PWD:/data -it archivebox/archivebox manage createsuperuser
# run the webserver to access the web UI
docker run -v $PWD:/data -it -p 8000:8000 archivebox/archivebox server 0.0.0.0:8000
open http://127.0.0.1:8000
# or export a static version of the index if you dont want to run a server
docker run -v $PWD:/data -it archivebox/archivebox list --html --with-headers > index.html
docker run -v $PWD:/data -it archivebox/archivebox list --json --with-headers > index.json
open ./index.html
```
## Bare Metal
```bash
# archivebox <command> [args]
```
First install the system, pip, and npm dependencies:
```bash
# Install main dependendencies using apt on Ubuntu/Debian, brew on mac, or pkg on BSD
apt install python3 python3-pip python3-dev git curl wget chromium-browser youtube-dl
# Install Node runtime (used for headless browser scripts like Readability, Singlefile, Mercury, etc.)
curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
&& echo 'deb https://deb.nodesource.com/node_14.x $(lsb_release -cs) main' >> /etc/apt/sources.list \
&& apt-get update \
&& apt-get install --no-install-recommends nodejs
# Make a directory to hold your collection
mkdir archivebox && cd archivebox # (can be anywhere, doesn't have to be called archivebox)
# Install the archivebox python package in ./.venv
python3 -m venv .venv && source .venv/bin/activate
pip install --upgrade archivebox
# Install node packages in ./node_modules (used for SingleFile, Readability, and Puppeteer)
npm install --prefix . 'git+https://github.com/ArchiveBox/ArchiveBox.git'
```
Initialize your archive and add some links:
```bash
archivebox init
archivebox add 'https://example.com' # add URLs as args pipe them in via stdin
archivebox add --depth=1 https://example.com/table-of-contents.html
# it can injest links from many formats, including RSS/JSON/XML/MD/TXT and more
curl https://getpocket.com/users/USERNAME/feed/all | archivebox add
```
Start the webserver to access the web UI:
```bash
archivebox manage createsuperuser
archivebox server 0.0.0.0:8000
open http://127.0.0.1:8000
```
Or export a static HTML version of the index if you don't want to run a webserver:
```bash
archivebox list --html --with-headers > index.html
archivebox list --json --with-headers > index.json
open ./index.html
```
To view more information about your dependencies, data, or the CLI:
```bash
archivebox version
archivebox status
archivebox help
```
---
<div align="center">
<img src="https://i.imgur.com/PVO88AZ.png" width="80%"/>
</div>
---
# Background & Motivation
Vast treasure troves of knowledge are lost every day on the internet to link rot. As a society, we have an imperative to preserve some important parts of that treasure, just like we preserve our books, paintings, and music in physical libraries long after the originals go out of print or fade into obscurity.
Whether it's to resist censorship by saving articles before they get taken down or edited, or
just to save a collection of early 2010's flash games you love to play, having the tools to
archive internet content enables to you save the stuff you care most about before it disappears.
<div align="center">
<img src="https://i.imgur.com/bC6eZcV.png" width="50%"/><br/>
<sup><i>Image from <a href="https://digiday.com/media/wtf-link-rot/">WTF is Link Rot?</a>...</i><br/></sup>
</div>
The balance between the permanence and ephemeral nature of content on the internet is part of what makes it beautiful.
I don't think everything should be preserved in an automated fashion, making all content permanent and never removable, but I do think people should be able to decide for themselves and effectively archive specific content that they care about.
Because modern websites are complicated and often rely on dynamic content,
ArchiveBox archives the sites in **several different formats** beyond what public archiving services like Archive.org and Archive.is are capable of saving. Using multiple methods and the market-dominant browser to execute JS ensures we can save even the most complex, finicky websites in at least a few high-quality, long-term data formats.
All the archived links are stored by date bookmarked in `./archive/<timestamp>`, and everything is indexed nicely with JSON & HTML files. The intent is for all the content to be viewable with common software in 50 - 100 years without needing to run ArchiveBox in a VM.
## Comparison to Other Projects
▶ **Check out our [community page](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community) for an index of web archiving initiatives and projects.**
<img src="https://i.imgur.com/4nkFjdv.png" width="10%" align="left" alt="comparison"/> The aim of ArchiveBox is to go beyond what the Wayback Machine and other public archiving services can do, by adding a headless browser to replay sessions accurately, and by automatically extracting all the content in multiple redundant formats that will survive being passed down to historians and archivists through many generations.
#### User Interface & Intended Purpose
ArchiveBox differentiates itself from [similar projects](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#Web-Archiving-Projects) by being a simple, one-shot CLI interface for users to ingest bulk feeds of URLs over extended periods, as opposed to being a backend service that ingests individual, manually-submitted URLs from a web UI. However, we also have the option to add urls via a web interface through our Django frontend.
#### Private Local Archives vs Centralized Public Archives
Unlike crawler software that starts from a seed URL and works outwards, or public tools like Archive.org designed for users to manually submit links from the public internet, ArchiveBox tries to be a set-and-forget archiver suitable for archiving your entire browsing history, RSS feeds, or bookmarks, ~~including private/authenticated content that you wouldn't otherwise share with a centralized service~~ (do not do this until v0.5 is released with some security fixes). Also by having each user store their own content locally, we can save much larger portions of everyone's browsing history than a shared centralized service would be able to handle.
#### Storage Requirements
Because ArchiveBox is designed to ingest a firehose of browser history and bookmark feeds to a local disk, it can be much more disk-space intensive than a centralized service like the Internet Archive or Archive.today. However, as storage space gets cheaper and compression improves, you should be able to use it continuously over the years without having to delete anything. In my experience, ArchiveBox uses about 5gb per 1000 articles, but your milage may vary depending on which options you have enabled and what types of sites you're archiving. By default, it archives everything in as many formats as possible, meaning it takes more space than a using a single method, but more content is accurately replayable over extended periods of time. Storage requirements can be reduced by using a compressed/deduplicated filesystem like ZFS/BTRFS, or by setting `SAVE_MEDIA=False` to skip audio & video files.
## Learn more
Whether you want to learn which organizations are the big players in the web archiving space, want to find a specific open-source tool for your web archiving need, or just want to see where archivists hang out online, our Community Wiki page serves as an index of the broader web archiving community. Check it out to learn about some of the coolest web archiving projects and communities on the web!
<img src="https://i.imgur.com/0ZOmOvN.png" width="14%" align="right"/>
- [Community Wiki](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community)
- [The Master Lists](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#The-Master-Lists)
_Community-maintained indexes of archiving tools and institutions._
- [Web Archiving Software](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#Web-Archiving-Projects)
_Open source tools and projects in the internet archiving space._
- [Reading List](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#Reading-List)
_Articles, posts, and blogs relevant to ArchiveBox and web archiving in general._
- [Communities](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#Communities)
_A collection of the most active internet archiving communities and initiatives._
- Check out the ArchiveBox [Roadmap](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap) and [Changelog](https://github.com/ArchiveBox/ArchiveBox/wiki/Changelog)
- Learn why archiving the internet is important by reading the "[On the Importance of Web Archiving](https://parameters.ssrc.org/2018/09/on-the-importance-of-web-archiving/)" blog post.
- Or reach out to me for questions and comments via [@ArchiveBoxApp](https://twitter.com/ArchiveBoxApp) or [@theSquashSH](https://twitter.com/thesquashSH) on Twitter.
---
# Documentation
<img src="https://read-the-docs-guidelines.readthedocs-hosted.com/_images/logo-dark.png" width="13%" align="right"/>
We use the [Github wiki system](https://github.com/ArchiveBox/ArchiveBox/wiki) and [Read the Docs](https://archivebox.readthedocs.io/en/latest/) (WIP) for documentation.
You can also access the docs locally by looking in the [`ArchiveBox/docs/`](https://github.com/ArchiveBox/ArchiveBox/wiki/Home) folder.
## Getting Started
- [Quickstart](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart)
- [Install](https://github.com/ArchiveBox/ArchiveBox/wiki/Install)
- [Docker](https://github.com/ArchiveBox/ArchiveBox/wiki/Docker)
## Reference
- [Usage](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage)
- [Configuration](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration)
- [Supported Sources](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive)
- [Supported Outputs](https://github.com/ArchiveBox/ArchiveBox/wiki#can-save-these-things-for-each-site)
- [Scheduled Archiving](https://github.com/ArchiveBox/ArchiveBox/wiki/Scheduled-Archiving)
- [Publishing Your Archive](https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive)
- [Chromium Install](https://github.com/ArchiveBox/ArchiveBox/wiki/Install-Chromium)
- [Security Overview](https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview)
- [Troubleshooting](https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting)
- [Python API](https://docs.archivebox.io/en/latest/modules.html)
- REST API (coming soon...)
## More Info
- [Tickets](https://github.com/ArchiveBox/ArchiveBox/issues)
- [Roadmap](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap)
- [Changelog](https://github.com/ArchiveBox/ArchiveBox/wiki/Changelog)
- [Donations](https://github.com/ArchiveBox/ArchiveBox/wiki/Donations)
- [Background & Motivation](https://github.com/ArchiveBox/ArchiveBox#background--motivation)
- [Web Archiving Community](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community)
---
# ArchiveBox Development
All contributions to ArchiveBox are welcomed! Check our [issues](https://github.com/ArchiveBox/ArchiveBox/issues) and [Roadmap](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap) for things to work on, and please open an issue to discuss your proposed implementation before working on things! Otherwise we may have to close your PR if it doesn't align with our roadmap.
### Setup the dev environment
First, install the system dependencies from the "Bare Metal" section above.
Then you can clone the ArchiveBox repo and install
```python3
git clone https://github.com/ArchiveBox/ArchiveBox
cd ArchiveBox
git checkout master # or the branch you want to test
git pull
# Install ArchiveBox + python dependencies
python3 -m venv .venv && source .venv/bin/activate && pip install -e .[dev]
# or
pipenv install --dev && pipenv shell
# Install node dependencies
npm install
# Optional: install the extractor dependencies
./bin/setup.sh
# Optional: develop via docker by mounting the code dir into the container
# if you edit e.g. ./archivebox/core/models.py on the docker host, runserver
# inside the container will reload and pick up your changes
docker build . -t archivebox
docker run -it -p 8000:8000 \
-v $PWD/data:/data \
-v $PWD/archivebox:/app/archivebox \
archivebox server 0.0.0.0:8000 --debug --reload
```
### Common development tasks
See the `./bin/` folder and read the source of the bash scripts within.
You can also run all these in Docker. For more examples see the Github Actions CI/CD tests that are run: `.github/workflows/*.yaml`.
#### Run the linters
```bash
./bin/lint.sh
```
(uses `flake8` and `mypy`)
#### Run the integration tests
```bash
./bin/test.sh
```
(uses `pytest -s`)
#### Build the docs, pip package, and docker image
```bash
./bin/build.sh
# or individually:
./bin/build_docs.sh
./bin/build_pip.sh
./bin/build_docker.sh
```
#### Roll a release
```bash
./bin/release.sh
```
(bumps the version, builds, and pushes a release to PyPI, Docker Hub, and Github Packages)
---
<div align="center">
<br/><br/>
<img src="https://raw.githubusercontent.com/Monadical-SAS/redux-time/HEAD/examples/static/jeremy.jpg" height="40px"/>
<br/>
<sub><i>This project is maintained mostly in <a href="https://nicksweeting.com/blog#About">my spare time</a> with the help from generous contributors and Monadical.com.</i></sub>
<br/><br/>
<br/>
<a href="https://github.com/sponsors/pirate">Sponsor us on Github</a>
<br>
<br>
<a href="https://www.patreon.com/theSquashSH"><img src="https://img.shields.io/badge/Donate_to_support_development-via_Patreon-%23DD5D76.svg?style=flat"/></a>
<br/>
<a href="https://twitter.com/ArchiveBoxApp"><img src="https://img.shields.io/badge/Tweet-%40ArchiveBoxApp-blue.svg?style=flat"/></a>
<a href="https://github.com/ArchiveBox/ArchiveBox"><img src="https://img.shields.io/github/stars/ArchiveBox/ArchiveBox.svg?style=flat&label=Star+on+Github"/></a>
<br/><br/>
</div>
Platform: UNKNOWN
Classifier: License :: OSI Approved :: MIT License
Classifier: Natural Language :: English
Classifier: Operating System :: OS Independent
Classifier: Development Status :: 4 - Beta
Classifier: Topic :: Utilities
Classifier: Topic :: System :: Archiving
Classifier: Topic :: System :: Archiving :: Backup
Classifier: Topic :: System :: Recovery Tools
Classifier: Topic :: Sociology :: History
Classifier: Topic :: Internet :: WWW/HTTP
Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
Classifier: Topic :: Internet :: WWW/HTTP :: WSGI :: Application
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: Education
Classifier: Intended Audience :: End Users/Desktop
Classifier: Intended Audience :: Information Technology
Classifier: Intended Audience :: Legal Industry
Classifier: Intended Audience :: System Administrators
Classifier: Environment :: Console
Classifier: Environment :: Web Environment
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Framework :: Django
Classifier: Typing :: Typed
Requires-Python: >=3.7
Description-Content-Type: text/markdown
Provides-Extra: dev

View file

@ -1,128 +0,0 @@
MANIFEST.in
README.md
setup.py
archivebox/.flake8
archivebox/LICENSE
archivebox/README.md
archivebox/__init__.py
archivebox/__main__.py
archivebox/base32_crockford.py
archivebox/config.py
archivebox/config_stubs.py
archivebox/logging_util.py
archivebox/main.py
archivebox/manage.py
archivebox/mypy.ini
archivebox/package.json
archivebox/system.py
archivebox/util.py
archivebox.egg-info/PKG-INFO
archivebox.egg-info/SOURCES.txt
archivebox.egg-info/dependency_links.txt
archivebox.egg-info/entry_points.txt
archivebox.egg-info/requires.txt
archivebox.egg-info/top_level.txt
archivebox/cli/__init__.py
archivebox/cli/archivebox_add.py
archivebox/cli/archivebox_config.py
archivebox/cli/archivebox_help.py
archivebox/cli/archivebox_init.py
archivebox/cli/archivebox_list.py
archivebox/cli/archivebox_manage.py
archivebox/cli/archivebox_oneshot.py
archivebox/cli/archivebox_remove.py
archivebox/cli/archivebox_schedule.py
archivebox/cli/archivebox_server.py
archivebox/cli/archivebox_shell.py
archivebox/cli/archivebox_status.py
archivebox/cli/archivebox_update.py
archivebox/cli/archivebox_version.py
archivebox/cli/tests.py
archivebox/core/__init__.py
archivebox/core/admin.py
archivebox/core/apps.py
archivebox/core/forms.py
archivebox/core/models.py
archivebox/core/settings.py
archivebox/core/tests.py
archivebox/core/urls.py
archivebox/core/utils.py
archivebox/core/utils_taggit.py
archivebox/core/views.py
archivebox/core/welcome_message.py
archivebox/core/wsgi.py
archivebox/core/management/commands/archivebox.py
archivebox/core/migrations/0001_initial.py
archivebox/core/migrations/0002_auto_20200625_1521.py
archivebox/core/migrations/0003_auto_20200630_1034.py
archivebox/core/migrations/0004_auto_20200713_1552.py
archivebox/core/migrations/0005_auto_20200728_0326.py
archivebox/core/migrations/0006_auto_20201012_1520.py
archivebox/core/migrations/__init__.py
archivebox/extractors/__init__.py
archivebox/extractors/archive_org.py
archivebox/extractors/dom.py
archivebox/extractors/favicon.py
archivebox/extractors/git.py
archivebox/extractors/headers.py
archivebox/extractors/media.py
archivebox/extractors/mercury.py
archivebox/extractors/pdf.py
archivebox/extractors/readability.py
archivebox/extractors/screenshot.py
archivebox/extractors/singlefile.py
archivebox/extractors/title.py
archivebox/extractors/wget.py
archivebox/index/__init__.py
archivebox/index/csv.py
archivebox/index/html.py
archivebox/index/json.py
archivebox/index/schema.py
archivebox/index/sql.py
archivebox/parsers/__init__.py
archivebox/parsers/generic_html.py
archivebox/parsers/generic_json.py
archivebox/parsers/generic_rss.py
archivebox/parsers/generic_txt.py
archivebox/parsers/medium_rss.py
archivebox/parsers/netscape_html.py
archivebox/parsers/pinboard_rss.py
archivebox/parsers/pocket_html.py
archivebox/parsers/shaarli_rss.py
archivebox/parsers/wallabag_atom.py
archivebox/themes/admin/actions_as_select.html
archivebox/themes/admin/app_index.html
archivebox/themes/admin/base.html
archivebox/themes/admin/login.html
archivebox/themes/default/add_links.html
archivebox/themes/default/base.html
archivebox/themes/default/main_index.html
archivebox/themes/default/core/snapshot_list.html
archivebox/themes/default/static/add.css
archivebox/themes/default/static/admin.css
archivebox/themes/default/static/archive.png
archivebox/themes/default/static/bootstrap.min.css
archivebox/themes/default/static/external.png
archivebox/themes/default/static/jquery.dataTables.min.css
archivebox/themes/default/static/jquery.dataTables.min.js
archivebox/themes/default/static/jquery.min.js
archivebox/themes/default/static/sort_asc.png
archivebox/themes/default/static/sort_both.png
archivebox/themes/default/static/sort_desc.png
archivebox/themes/default/static/spinner.gif
archivebox/themes/legacy/favicon.ico
archivebox/themes/legacy/link_details.html
archivebox/themes/legacy/main_index.html
archivebox/themes/legacy/main_index_minimal.html
archivebox/themes/legacy/main_index_row.html
archivebox/themes/legacy/robots.txt
archivebox/themes/legacy/static/archive.png
archivebox/themes/legacy/static/bootstrap.min.css
archivebox/themes/legacy/static/external.png
archivebox/themes/legacy/static/jquery.dataTables.min.css
archivebox/themes/legacy/static/jquery.dataTables.min.js
archivebox/themes/legacy/static/jquery.min.js
archivebox/themes/legacy/static/sort_asc.png
archivebox/themes/legacy/static/sort_both.png
archivebox/themes/legacy/static/sort_desc.png
archivebox/themes/legacy/static/spinner.gif

View file

@ -1 +0,0 @@

View file

@ -1,3 +0,0 @@
[console_scripts]
archivebox = archivebox.cli:main

View file

@ -1,25 +0,0 @@
requests==2.24.0
atomicwrites==1.4.0
mypy-extensions==0.4.3
django==3.0.8
django-extensions==3.0.3
dateparser
ipython
youtube-dl
python-crontab==2.5.1
croniter==0.3.34
w3lib==1.22.0
[dev]
setuptools
twine
flake8
ipdb
mypy
django-stubs
sphinx
sphinx-rtd-theme
recommonmark
pytest
bottle
stdeb

View file

@ -1 +0,0 @@
archivebox

View file

@ -1,172 +0,0 @@
"""
base32-crockford
================
A Python module implementing the alternate base32 encoding as described
by Douglas Crockford at: http://www.crockford.com/wrmg/base32.html.
He designed the encoding to:
* Be human and machine readable
* Be compact
* Be error resistant
* Be pronounceable
It uses a symbol set of 10 digits and 22 letters, excluding I, L O and
U. Decoding is not case sensitive, and 'i' and 'l' are converted to '1'
and 'o' is converted to '0'. Encoding uses only upper-case characters.
Hyphens may be present in symbol strings to improve readability, and
are removed when decoding.
A check symbol can be appended to a symbol string to detect errors
within the string.
"""
import re
import sys
PY3 = sys.version_info[0] == 3
if not PY3:
import string as str
__all__ = ["encode", "decode", "normalize"]
if PY3:
string_types = (str,)
else:
string_types = (basestring,) # noqa
# The encoded symbol space does not include I, L, O or U
symbols = '0123456789ABCDEFGHJKMNPQRSTVWXYZ'
# These five symbols are exclusively for checksum values
check_symbols = '*~$=U'
encode_symbols = dict((i, ch) for (i, ch) in enumerate(symbols + check_symbols))
decode_symbols = dict((ch, i) for (i, ch) in enumerate(symbols + check_symbols))
normalize_symbols = str.maketrans('IiLlOo', '111100')
valid_symbols = re.compile('^[%s]+[%s]?$' % (symbols,
re.escape(check_symbols)))
base = len(symbols)
check_base = len(symbols + check_symbols)
def encode(number, checksum=False, split=0):
"""Encode an integer into a symbol string.
A ValueError is raised on invalid input.
If checksum is set to True, a check symbol will be
calculated and appended to the string.
If split is specified, the string will be divided into
clusters of that size separated by hyphens.
The encoded string is returned.
"""
number = int(number)
if number < 0:
raise ValueError("number '%d' is not a positive integer" % number)
split = int(split)
if split < 0:
raise ValueError("split '%d' is not a positive integer" % split)
check_symbol = ''
if checksum:
check_symbol = encode_symbols[number % check_base]
if number == 0:
return '0' + check_symbol
symbol_string = ''
while number > 0:
remainder = number % base
number //= base
symbol_string = encode_symbols[remainder] + symbol_string
symbol_string = symbol_string + check_symbol
if split:
chunks = []
for pos in range(0, len(symbol_string), split):
chunks.append(symbol_string[pos:pos + split])
symbol_string = '-'.join(chunks)
return symbol_string
def decode(symbol_string, checksum=False, strict=False):
"""Decode an encoded symbol string.
If checksum is set to True, the string is assumed to have a
trailing check symbol which will be validated. If the
checksum validation fails, a ValueError is raised.
If strict is set to True, a ValueError is raised if the
normalization step requires changes to the string.
The decoded string is returned.
"""
symbol_string = normalize(symbol_string, strict=strict)
if checksum:
symbol_string, check_symbol = symbol_string[:-1], symbol_string[-1]
number = 0
for symbol in symbol_string:
number = number * base + decode_symbols[symbol]
if checksum:
check_value = decode_symbols[check_symbol]
modulo = number % check_base
if check_value != modulo:
raise ValueError("invalid check symbol '%s' for string '%s'" %
(check_symbol, symbol_string))
return number
def normalize(symbol_string, strict=False):
"""Normalize an encoded symbol string.
Normalization provides error correction and prepares the
string for decoding. These transformations are applied:
1. Hyphens are removed
2. 'I', 'i', 'L' or 'l' are converted to '1'
3. 'O' or 'o' are converted to '0'
4. All characters are converted to uppercase
A TypeError is raised if an invalid string type is provided.
A ValueError is raised if the normalized string contains
invalid characters.
If the strict parameter is set to True, a ValueError is raised
if any of the above transformations are applied.
The normalized string is returned.
"""
if isinstance(symbol_string, string_types):
if not PY3:
try:
symbol_string = symbol_string.encode('ascii')
except UnicodeEncodeError:
raise ValueError("string should only contain ASCII characters")
else:
raise TypeError("string is of invalid type %s" %
symbol_string.__class__.__name__)
norm_string = symbol_string.replace('-', '').translate(normalize_symbols).upper()
if not valid_symbols.match(norm_string):
raise ValueError("string '%s' contains invalid characters" % norm_string)
if strict and norm_string != symbol_string:
raise ValueError("string '%s' requires normalization" % symbol_string)
return norm_string

View file

@ -19,6 +19,8 @@ meta_cmds = ('help', 'version')
main_cmds = ('init', 'info', 'config')
archive_cmds = ('add', 'remove', 'update', 'list', 'status')
fake_db = ("oneshot",)
display_first = (*meta_cmds, *main_cmds, *archive_cmds)
# every imported command module must have these properties in order to be valid
@ -59,6 +61,10 @@ def run_subcommand(subcommand: str,
pwd: Union[Path, str, None]=None) -> None:
"""Run a given ArchiveBox subcommand with the given list of args"""
if subcommand not in meta_cmds:
from ..config import setup_django
setup_django(in_memory_db=subcommand in fake_db, check_db=subcommand in archive_cmds)
module = import_module('.archivebox_{}'.format(subcommand), __package__)
module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore
@ -134,3 +140,5 @@ __all__ = (
'run_subcommand',
*SUBCOMMANDS.keys(),
)

View file

@ -89,8 +89,8 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
index_only=command.index_only,
overwrite=command.overwrite,
init=command.init,
out_dir=pwd or OUTPUT_DIR,
extractors=command.extract,
out_dir=pwd or OUTPUT_DIR,
)

View file

@ -98,7 +98,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
parser.add_argument(
'--filter-type',
type=str,
choices=('exact', 'substring', 'domain', 'regex','tag'),
choices=('exact', 'substring', 'domain', 'regex', 'tag', 'search'),
default='exact',
help='Type of pattern matching to use when filtering URLs',
)

View file

@ -36,6 +36,13 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
' ~/Desktop/sites_list.csv\n'
)
)
parser.add_argument(
"--extract",
type=str,
help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
This does not take precedence over the configuration",
default=""
)
parser.add_argument(
'--out-dir',
type=str,
@ -55,6 +62,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
oneshot(
url=stdin_url or url,
out_dir=Path(command.out_dir).resolve(),
extractors=command.extract,
)

View file

@ -91,7 +91,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
parser.add_argument(
'--filter-type',
type=str,
choices=('exact', 'substring', 'domain', 'regex'),
choices=('exact', 'substring', 'domain', 'regex', 'tag', 'search'),
default='exact',
help='Type of pattern matching to use when filtering URLs',
)
@ -102,6 +102,13 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
default=None,
help='Update only URLs matching these filter patterns.'
)
parser.add_argument(
"--extract",
type=str,
help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
This does not take precedence over the configuration",
default=""
)
command = parser.parse_args(args or ())
filter_patterns_str = accept_stdin(stdin)
@ -117,6 +124,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
after=command.after,
before=command.before,
out_dir=pwd or OUTPUT_DIR,
extractors=command.extract,
)

View file

@ -1,3 +1,24 @@
"""
ArchiveBox config definitons (including defaults and dynamic config options).
Config Usage Example:
archivebox config --set MEDIA_TIMEOUT=600
env MEDIA_TIMEOUT=600 USE_COLOR=False ... archivebox [subcommand] ...
Config Precedence Order:
1. cli args (--update-all / --index-only / etc.)
2. shell environment vars (env USE_COLOR=False archivebox add '...')
3. config file (echo "SAVE_FAVICON=False" >> ArchiveBox.conf)
4. defaults (defined below in Python)
Documentation:
https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
"""
__package__ = 'archivebox'
import os
@ -24,26 +45,9 @@ from .config_stubs import (
ConfigDefaultDict,
)
# precedence order for config:
# 1. cli args (e.g. )
# 2. shell environment vars (env USE_COLOR=False archivebox add '...')
# 3. config file (echo "SAVE_FAVICON=False" >> ArchiveBox.conf)
# 4. defaults (defined below in Python)
############################### Config Schema ##################################
#
# env SHOW_PROGRESS=1 archivebox add '...'
# archivebox config --set TIMEOUT=600
#
# ******************************************************************************
# Documentation: https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
# Use the 'env' command to pass config options to ArchiveBox. e.g.:
# env USE_COLOR=True CHROME_BINARY=chromium archivebox add < example.html
# ******************************************************************************
################################# User Config ##################################
CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'SHELL_CONFIG': {
'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()},
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
@ -139,6 +143,18 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
'GIT_ARGS': {'type': list, 'default': ['--recursive']},
},
'SEARCH_BACKEND_CONFIG' : {
'USE_INDEXING_BACKEND': {'type': bool, 'default': True},
'USE_SEARCHING_BACKEND': {'type': bool, 'default': True},
'SEARCH_BACKEND_ENGINE': {'type': str, 'default': 'ripgrep'},
'SEARCH_BACKEND_HOST_NAME': {'type': str, 'default': 'localhost'},
'SEARCH_BACKEND_PORT': {'type': int, 'default': 1491},
'SEARCH_BACKEND_PASSWORD': {'type': str, 'default': 'SecretPassword'},
# SONIC
'SONIC_COLLECTION': {'type': str, 'default': 'archivebox'},
'SONIC_BUCKET': {'type': str, 'default': 'snapshots'},
},
'DEPENDENCY_CONFIG': {
'USE_CURL': {'type': bool, 'default': True},
'USE_WGET': {'type': bool, 'default': True},
@ -149,7 +165,8 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
'USE_CHROME': {'type': bool, 'default': True},
'USE_NODE': {'type': bool, 'default': True},
'USE_YOUTUBEDL': {'type': bool, 'default': True},
'USE_RIPGREP': {'type': bool, 'default': True},
'CURL_BINARY': {'type': str, 'default': 'curl'},
'GIT_BINARY': {'type': str, 'default': 'git'},
'WGET_BINARY': {'type': str, 'default': 'wget'},
@ -158,25 +175,48 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
'MERCURY_BINARY': {'type': str, 'default': 'mercury-parser'},
'YOUTUBEDL_BINARY': {'type': str, 'default': 'youtube-dl'},
'NODE_BINARY': {'type': str, 'default': 'node'},
'RIPGREP_BINARY': {'type': str, 'default': 'rg'},
'CHROME_BINARY': {'type': str, 'default': None},
'POCKET_CONSUMER_KEY': {'type': str, 'default': None},
'POCKET_ACCESS_TOKENS': {'type': dict, 'default': {}},
},
}
########################## Backwards-Compatibility #############################
# for backwards compatibility with old config files, check old/deprecated names for each key
CONFIG_ALIASES = {
alias: key
for section in CONFIG_DEFAULTS.values()
for section in CONFIG_SCHEMA.values()
for key, default in section.items()
for alias in default.get('aliases', ())
}
USER_CONFIG = {key for section in CONFIG_DEFAULTS.values() for key in section.keys()}
USER_CONFIG = {key for section in CONFIG_SCHEMA.values() for key in section.keys()}
def get_real_name(key: str) -> str:
"""get the current canonical name for a given deprecated config key"""
return CONFIG_ALIASES.get(key.upper().strip(), key.upper().strip())
############################## Derived Config ##############################
# Constants
################################ Constants #####################################
PACKAGE_DIR_NAME = 'archivebox'
TEMPLATES_DIR_NAME = 'themes'
ARCHIVE_DIR_NAME = 'archive'
SOURCES_DIR_NAME = 'sources'
LOGS_DIR_NAME = 'logs'
STATIC_DIR_NAME = 'static'
SQL_INDEX_FILENAME = 'index.sqlite3'
JSON_INDEX_FILENAME = 'index.json'
HTML_INDEX_FILENAME = 'index.html'
ROBOTS_TXT_FILENAME = 'robots.txt'
FAVICON_FILENAME = 'favicon.ico'
CONFIG_FILENAME = 'ArchiveBox.conf'
DEFAULT_CLI_COLORS = {
'reset': '\033[00;00m',
@ -225,42 +265,18 @@ STATICFILE_EXTENSIONS = {
# html, htm, shtml, xhtml, xml, aspx, php, cgi
}
PACKAGE_DIR_NAME = 'archivebox'
TEMPLATES_DIR_NAME = 'themes'
ARCHIVE_DIR_NAME = 'archive'
SOURCES_DIR_NAME = 'sources'
LOGS_DIR_NAME = 'logs'
STATIC_DIR_NAME = 'static'
SQL_INDEX_FILENAME = 'index.sqlite3'
JSON_INDEX_FILENAME = 'index.json'
HTML_INDEX_FILENAME = 'index.html'
ROBOTS_TXT_FILENAME = 'robots.txt'
FAVICON_FILENAME = 'favicon.ico'
CONFIG_FILENAME = 'ArchiveBox.conf'
CONFIG_HEADER = (
"""# This is the config file for your ArchiveBox collection.
#
# You can add options here manually in INI format, or automatically by running:
# archivebox config --set KEY=VALUE
#
# If you modify this file manually, make sure to update your archive after by running:
# archivebox init
#
# A list of all possible config with documentation and examples can be found here:
# https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
""")
DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
############################## Derived Config ##################################
DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'TERM_WIDTH': {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns},
'USER': {'default': lambda c: getpass.getuser() or os.getlogin()},
'ANSI': {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
'PACKAGE_DIR': {'default': lambda c: Path(__file__).resolve().parent},
'TEMPLATES_DIR': {'default': lambda c: c['PACKAGE_DIR'] / TEMPLATES_DIR_NAME / 'legacy'},
'TEMPLATES_DIR': {'default': lambda c: c['PACKAGE_DIR'] / TEMPLATES_DIR_NAME},
'OUTPUT_DIR': {'default': lambda c: Path(c['OUTPUT_DIR']).resolve() if c['OUTPUT_DIR'] else Path(os.curdir).resolve()},
'ARCHIVE_DIR': {'default': lambda c: c['OUTPUT_DIR'] / ARCHIVE_DIR_NAME},
@ -297,6 +313,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
'SAVE_WARC': {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']},
'WGET_ARGS': {'default': lambda c: c['WGET_ARGS'] or []},
'RIPGREP_VERSION': {'default': lambda c: bin_version(c['RIPGREP_BINARY']) if c['USE_RIPGREP'] else None},
'USE_SINGLEFILE': {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
'SINGLEFILE_VERSION': {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
@ -305,7 +322,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
'READABILITY_VERSION': {'default': lambda c: bin_version(c['READABILITY_BINARY']) if c['USE_READABILITY'] else None},
'USE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']},
'MERCURY_VERSION': {'default': lambda c: '1.0.0' if (c['USE_MERCURY'] and c['MERCURY_BINARY']) else None}, # mercury is unversioned
'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury is unversioned
'USE_GIT': {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
'GIT_VERSION': {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},
@ -319,8 +336,6 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
'USE_CHROME': {'default': lambda c: c['USE_CHROME'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'] or c['SAVE_SINGLEFILE'])},
'CHROME_BINARY': {'default': lambda c: c['CHROME_BINARY'] if c['CHROME_BINARY'] else find_chrome_binary()},
'CHROME_VERSION': {'default': lambda c: bin_version(c['CHROME_BINARY']) if c['USE_CHROME'] else None},
'USE_NODE': {'default': lambda c: c['USE_NODE'] and (c['SAVE_READABILITY'] or c['SAVE_SINGLEFILE'])},
'NODE_VERSION': {'default': lambda c: bin_version(c['NODE_BINARY']) if c['USE_NODE'] else None},
'SAVE_PDF': {'default': lambda c: c['USE_CHROME'] and c['SAVE_PDF']},
'SAVE_SCREENSHOT': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SCREENSHOT']},
@ -328,6 +343,9 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
'SAVE_SINGLEFILE': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SINGLEFILE'] and c['USE_NODE']},
'SAVE_READABILITY': {'default': lambda c: c['USE_READABILITY'] and c['USE_NODE']},
'SAVE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['USE_NODE']},
'USE_NODE': {'default': lambda c: c['USE_NODE'] and (c['SAVE_READABILITY'] or c['SAVE_SINGLEFILE'] or c['SAVE_MERCURY'])},
'NODE_VERSION': {'default': lambda c: bin_version(c['NODE_BINARY']) if c['USE_NODE'] else None},
'DEPENDENCIES': {'default': lambda c: get_dependency_info(c)},
'CODE_LOCATIONS': {'default': lambda c: get_code_locations(c)},
@ -340,6 +358,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
################################### Helpers ####################################
def load_config_val(key: str,
default: ConfigDefaultValue=None,
type: Optional[Type]=None,
@ -386,7 +405,7 @@ def load_config_val(key: str,
raise ValueError(f'Invalid configuration option {key}={val} (expected an integer)')
return int(val)
elif type is list:
elif type is list or type is dict:
return json.loads(val)
raise Exception('Config values can only be str, bool, int or json')
@ -418,6 +437,20 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
from .system import atomic_write
CONFIG_HEADER = (
"""# This is the config file for your ArchiveBox collection.
#
# You can add options here manually in INI format, or automatically by running:
# archivebox config --set KEY=VALUE
#
# If you modify this file manually, make sure to update your archive after by running:
# archivebox init
#
# A list of all possible config with documentation and examples can be found here:
# https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
""")
out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve()
config_path = Path(out_dir) / CONFIG_FILENAME
@ -431,7 +464,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
with open(config_path, 'r') as old:
atomic_write(f'{config_path}.bak', old.read())
find_section = lambda key: [name for name, opts in CONFIG_DEFAULTS.items() if key in opts][0]
find_section = lambda key: [name for name, opts in CONFIG_SCHEMA.items() if key in opts][0]
# Set up sections in empty config file
for key, val in config.items():
@ -520,6 +553,8 @@ def load_config(defaults: ConfigDefaultDict,
# with open(os.path.join(config['OUTPUT_DIR'], CONFIG_FILENAME), 'w+') as f:
# Logging Helpers
def stdout(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
@ -551,6 +586,7 @@ def hint(text: Union[Tuple[str, ...], List[str], str], prefix=' ', config: Op
stderr('{} {}'.format(prefix, line))
# Dependency Metadata Helpers
def bin_version(binary: Optional[str]) -> Optional[str]:
"""check the presence and return valid version line of a specified binary"""
@ -580,7 +616,7 @@ def bin_path(binary: Optional[str]) -> Optional[str]:
if node_modules_bin.exists():
return str(node_modules_bin.resolve())
return shutil.which(Path(binary).expanduser()) or binary
return shutil.which(str(Path(binary).expanduser())) or shutil.which(str(binary)) or binary
def bin_hash(binary: Optional[str]) -> Optional[str]:
if binary is None:
@ -667,7 +703,7 @@ def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
'TEMPLATES_DIR': {
'path': (config['TEMPLATES_DIR']).resolve(),
'enabled': True,
'is_valid': (config['TEMPLATES_DIR'] / 'static').exists(),
'is_valid': (config['TEMPLATES_DIR'] / config['ACTIVE_THEME'] / 'static').exists(),
},
# 'NODE_MODULES_DIR': {
# 'path': ,
@ -811,6 +847,21 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
'enabled': config['USE_CHROME'],
'is_valid': bool(config['CHROME_VERSION']),
},
'RIPGREP_BINARY': {
'path': bin_path(config['RIPGREP_BINARY']),
'version': config['RIPGREP_VERSION'],
'hash': bin_hash(config['RIPGREP_BINARY']),
'enabled': config['USE_RIPGREP'],
'is_valid': bool(config['RIPGREP_VERSION']),
},
# TODO: add an entry for the sonic search backend?
# 'SONIC_BINARY': {
# 'path': bin_path(config['SONIC_BINARY']),
# 'version': config['SONIC_VERSION'],
# 'hash': bin_hash(config['SONIC_BINARY']),
# 'enabled': config['USE_SONIC'],
# 'is_valid': bool(config['SONIC_VERSION']),
# },
}
def get_chrome_info(config: ConfigDict) -> ConfigValue:
@ -826,28 +877,51 @@ def get_chrome_info(config: ConfigDict) -> ConfigValue:
}
################################## Load Config #################################
# ******************************************************************************
# ******************************************************************************
# ******************************** Load Config *********************************
# ******* (compile the defaults, configs, and metadata all into CONFIG) ********
# ******************************************************************************
# ******************************************************************************
def load_all_config():
CONFIG: ConfigDict = {}
for section_name, section_config in CONFIG_DEFAULTS.items():
for section_name, section_config in CONFIG_SCHEMA.items():
CONFIG = load_config(section_config, CONFIG)
return load_config(DERIVED_CONFIG_DEFAULTS, CONFIG)
return load_config(DYNAMIC_CONFIG_SCHEMA, CONFIG)
# add all final config values in CONFIG to globals in this file
CONFIG = load_all_config()
globals().update(CONFIG)
# this lets us do: from .config import DEBUG, MEDIA_TIMEOUT, ...
# Timezone set as UTC
# ******************************************************************************
# ******************************************************************************
# ******************************************************************************
# ******************************************************************************
# ******************************************************************************
########################### System Environment Setup ###########################
# Set timezone to UTC and umask to OUTPUT_PERMISSIONS
os.environ["TZ"] = 'UTC'
os.umask(0o777 - int(OUTPUT_PERMISSIONS, base=8)) # noqa: F821
# add ./node_modules/.bin to $PATH so we can use node scripts in extractors
NODE_BIN_PATH = str((Path(CONFIG["OUTPUT_DIR"]).absolute() / 'node_modules' / '.bin'))
sys.path.append(NODE_BIN_PATH)
############################## Importable Checkers #############################
########################### Config Validity Checkers ###########################
def check_system_config(config: ConfigDict=CONFIG) -> None:
### Check system environment
@ -936,7 +1010,7 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
stderr()
def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) -> None:
def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG) -> None:
output_dir = out_dir or config['OUTPUT_DIR']
assert isinstance(output_dir, (str, Path))
@ -976,7 +1050,7 @@ def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) ->
def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG) -> None:
def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
check_system_config()
output_dir = out_dir or Path(config['OUTPUT_DIR'])
@ -989,7 +1063,15 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG)
os.environ.setdefault('OUTPUT_DIR', str(output_dir))
assert (config['PACKAGE_DIR'] / 'core' / 'settings.py').exists(), 'settings.py was not found at archivebox/core/settings.py'
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
django.setup()
if in_memory_db:
# Put the db in memory and run migrations in case any command requires it
from django.core.management import call_command
os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:")
django.setup()
call_command("migrate", interactive=False, verbosity=0)
else:
django.setup()
if check_db:
sql_index_path = Path(output_dir) / SQL_INDEX_FILENAME
@ -997,5 +1079,3 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG)
f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {config["OUTPUT_DIR"]}')
except KeyboardInterrupt:
raise SystemExit(2)
os.umask(0o777 - int(OUTPUT_PERMISSIONS, base=8)) # noqa: F821

View file

@ -13,8 +13,10 @@ from django import forms
from core.models import Snapshot, Tag
from core.forms import AddLinkForm, TagField
from core.utils import get_icons
from core.mixins import SearchResultsAdminMixin
from index.html import snapshot_icons
from util import htmldecode, urldecode, ansi_to_html
from logging_util import printable_filesize
from main import add, remove
@ -82,7 +84,7 @@ class SnapshotAdminForm(forms.ModelForm):
return instance
class SnapshotAdmin(admin.ModelAdmin):
class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
list_display = ('added', 'title_str', 'url_str', 'files', 'size')
sort_fields = ('title_str', 'url_str', 'added')
readonly_fields = ('id', 'url', 'timestamp', 'num_outputs', 'is_archived', 'url_hash', 'added', 'updated')
@ -94,6 +96,13 @@ class SnapshotAdmin(admin.ModelAdmin):
actions_template = 'admin/actions_as_select.html'
form = SnapshotAdminForm
def get_urls(self):
urls = super().get_urls()
custom_urls = [
path('grid/', self.admin_site.admin_view(self.grid_view),name='grid')
]
return custom_urls + urls
def get_queryset(self, request):
return super().get_queryset(request).prefetch_related('tags')
@ -128,7 +137,7 @@ class SnapshotAdmin(admin.ModelAdmin):
) + mark_safe(f' <span class="tags">{tags}</span>')
def files(self, obj):
return get_icons(obj)
return snapshot_icons(obj)
def size(self, obj):
archive_size = obj.archive_size
@ -151,6 +160,31 @@ class SnapshotAdmin(admin.ModelAdmin):
obj.url.split('://www.', 1)[-1].split('://', 1)[-1][:64],
)
def grid_view(self, request):
# cl = self.get_changelist_instance(request)
# Save before monkey patching to restore for changelist list view
saved_change_list_template = self.change_list_template
saved_list_per_page = self.list_per_page
saved_list_max_show_all = self.list_max_show_all
# Monkey patch here plus core_tags.py
self.change_list_template = 'admin/grid_change_list.html'
self.list_per_page = 20
self.list_max_show_all = self.list_per_page
# Call monkey patched view
rendered_response = self.changelist_view(request)
# Restore values
self.change_list_template = saved_change_list_template
self.list_per_page = saved_list_per_page
self.list_max_show_all = saved_list_max_show_all
return rendered_response
id_str.short_description = 'ID'
title_str.short_description = 'Title'
url_str.short_description = 'Original URL'
@ -216,7 +250,6 @@ class ArchiveBoxAdmin(admin.AdminSite):
return render(template_name='add_links.html', request=request, context=context)
admin.site = ArchiveBoxAdmin()
admin.site.register(get_user_model())
admin.site.register(Snapshot, SnapshotAdmin)

View file

@ -3,18 +3,29 @@ __package__ = 'archivebox.core'
from django import forms
from ..util import URL_REGEX
from .utils_taggit import edit_string_for_tags, parse_tags
from ..vendor.taggit_utils import edit_string_for_tags, parse_tags
CHOICES = (
('0', 'depth = 0 (archive just these URLs)'),
('1', 'depth = 1 (archive these URLs and all URLs one hop away)'),
)
from ..extractors import get_default_archive_methods
ARCHIVE_METHODS = [
(name, name)
for name, _, _ in get_default_archive_methods()
]
class AddLinkForm(forms.Form):
url = forms.RegexField(label="URLs (one per line)", regex=URL_REGEX, min_length='6', strip=True, widget=forms.Textarea, required=True)
depth = forms.ChoiceField(label="Archive depth", choices=CHOICES, widget=forms.RadioSelect, initial='0')
archive_methods = forms.MultipleChoiceField(
required=False,
widget=forms.SelectMultiple,
choices=ARCHIVE_METHODS,
)
class TagWidgetMixin:
def format_value(self, value):
if value is not None and not isinstance(value, str):

View file

@ -0,0 +1,97 @@
# Generated by Django 3.0.8 on 2020-11-04 12:25
import json
from pathlib import Path
from django.db import migrations, models
import django.db.models.deletion
from config import CONFIG
from index.json import to_json
try:
JSONField = models.JSONField
except AttributeError:
import jsonfield
JSONField = jsonfield.JSONField
def forwards_func(apps, schema_editor):
from core.models import EXTRACTORS
Snapshot = apps.get_model("core", "Snapshot")
ArchiveResult = apps.get_model("core", "ArchiveResult")
snapshots = Snapshot.objects.all()
for snapshot in snapshots:
out_dir = Path(CONFIG['ARCHIVE_DIR']) / snapshot.timestamp
try:
with open(out_dir / "index.json", "r") as f:
fs_index = json.load(f)
except Exception as e:
continue
history = fs_index["history"]
for extractor in history:
for result in history[extractor]:
ArchiveResult.objects.create(extractor=extractor, snapshot=snapshot, cmd=result["cmd"], cmd_version=result["cmd_version"],
start_ts=result["start_ts"], end_ts=result["end_ts"], status=result["status"], pwd=result["pwd"], output=result["output"])
def verify_json_index_integrity(snapshot):
results = snapshot.archiveresult_set.all()
out_dir = Path(CONFIG['ARCHIVE_DIR']) / snapshot.timestamp
with open(out_dir / "index.json", "r") as f:
index = json.load(f)
history = index["history"]
index_results = [result for extractor in history for result in history[extractor]]
flattened_results = [result["start_ts"] for result in index_results]
missing_results = [result for result in results if result.start_ts.isoformat() not in flattened_results]
for missing in missing_results:
index["history"][missing.extractor].append({"cmd": missing.cmd, "cmd_version": missing.cmd_version, "end_ts": missing.end_ts.isoformat(),
"start_ts": missing.start_ts.isoformat(), "pwd": missing.pwd, "output": missing.output,
"schema": "ArchiveResult", "status": missing.status})
json_index = to_json(index)
with open(out_dir / "index.json", "w") as f:
f.write(json_index)
def reverse_func(apps, schema_editor):
Snapshot = apps.get_model("core", "Snapshot")
ArchiveResult = apps.get_model("core", "ArchiveResult")
for snapshot in Snapshot.objects.all():
verify_json_index_integrity(snapshot)
ArchiveResult.objects.all().delete()
class Migration(migrations.Migration):
dependencies = [
('core', '0006_auto_20201012_1520'),
]
operations = [
migrations.CreateModel(
name='ArchiveResult',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('cmd', JSONField()),
('pwd', models.CharField(max_length=256)),
('cmd_version', models.CharField(max_length=32)),
('status', models.CharField(choices=[('succeeded', 'succeeded'), ('failed', 'failed'), ('skipped', 'skipped')], max_length=16)),
('output', models.CharField(max_length=512)),
('start_ts', models.DateTimeField()),
('end_ts', models.DateTimeField()),
('extractor', models.CharField(choices=[('title', 'title'), ('favicon', 'favicon'), ('wget', 'wget'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('headers', 'headers'), ('archive_org', 'archive_org')], max_length=32)),
('snapshot', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.Snapshot')),
],
),
migrations.RunPython(forwards_func, reverse_func),
]

23
archivebox/core/mixins.py Normal file
View file

@ -0,0 +1,23 @@
from django.contrib import messages
from archivebox.search import query_search_index
class SearchResultsAdminMixin(object):
def get_search_results(self, request, queryset, search_term):
''' Enhances the search queryset with results from the search backend.
'''
qs, use_distinct = \
super(SearchResultsAdminMixin, self).get_search_results(
request, queryset, search_term)
search_term = search_term.strip()
if not search_term:
return qs, use_distinct
try:
qsearch = query_search_index(search_term)
except Exception as err:
messages.add_message(request, messages.WARNING, f'Error from the search backend, only showing results from default admin search fields - Error: {err}')
else:
qs = queryset & qsearch
finally:
return qs, use_distinct

View file

@ -5,9 +5,24 @@ import uuid
from django.db import models, transaction
from django.utils.functional import cached_property
from django.utils.text import slugify
from django.db.models import Case, When, Value, IntegerField
from ..util import parse_date
from ..index.schema import Link
from ..extractors import get_default_archive_methods, ARCHIVE_METHODS_INDEXING_PRECEDENCE
EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()]
STATUS_CHOICES = [
("succeeded", "succeeded"),
("failed", "failed"),
("skipped", "skipped")
]
try:
JSONField = models.JSONField
except AttributeError:
import jsonfield
JSONField = jsonfield.JSONField
class Tag(models.Model):
@ -51,6 +66,7 @@ class Tag(models.Model):
else:
return super().save(*args, **kwargs)
class Snapshot(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
@ -83,7 +99,7 @@ class Snapshot(models.Model):
return {
key: getattr(self, key)
if key != 'tags' else self.tags_str()
for key in args
for key in args
}
def as_link(self) -> Link:
@ -92,7 +108,7 @@ class Snapshot(models.Model):
def as_link_with_details(self) -> Link:
from ..index import load_link_details
return load_link_details(self.as_link())
def tags_str(self) -> str:
return ','.join(self.tags.order_by('name').values_list('name', flat=True))
@ -106,7 +122,7 @@ class Snapshot(models.Model):
@cached_property
def num_outputs(self):
return self.as_link().num_outputs
return self.archiveresult_set.filter(status='succeeded').count()
@cached_property
def url_hash(self):
@ -130,8 +146,8 @@ class Snapshot(models.Model):
@cached_property
def history(self):
from ..index import load_link_details
return load_link_details(self.as_link()).history
# TODO: use ArchiveResult for this instead of json
return self.as_link_with_details().history
@cached_property
def latest_title(self):
@ -142,9 +158,37 @@ class Snapshot(models.Model):
return self.history['title'][-1].output.strip()
return None
def save_tags(self, tags=[]):
def save_tags(self, tags=()):
tags_id = []
for tag in tags:
tags_id.append(Tag.objects.get_or_create(name=tag)[0].id)
self.tags.clear()
self.tags.add(*tags_id)
class ArchiveResultManager(models.Manager):
def indexable(self, sorted: bool = True):
INDEXABLE_METHODS = [ r[0] for r in ARCHIVE_METHODS_INDEXING_PRECEDENCE ]
qs = self.get_queryset().filter(extractor__in=INDEXABLE_METHODS,status='succeeded')
if sorted:
precedence = [ When(extractor=method, then=Value(precedence)) for method, precedence in ARCHIVE_METHODS_INDEXING_PRECEDENCE ]
qs = qs.annotate(indexing_precedence=Case(*precedence, default=Value(1000),output_field=IntegerField())).order_by('indexing_precedence')
return qs
class ArchiveResult(models.Model):
snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
cmd = JSONField()
pwd = models.CharField(max_length=256)
cmd_version = models.CharField(max_length=32)
output = models.CharField(max_length=512)
start_ts = models.DateTimeField()
end_ts = models.DateTimeField()
status = models.CharField(max_length=16, choices=STATUS_CHOICES)
extractor = models.CharField(choices=EXTRACTORS, max_length=32)
objects = ArchiveResultManager()
def __str__(self):
return self.extractor

View file

@ -12,6 +12,7 @@ from ..config import (
ALLOWED_HOSTS,
PACKAGE_DIR,
ACTIVE_THEME,
TEMPLATES_DIR_NAME,
SQL_INDEX_FILENAME,
OUTPUT_DIR,
)
@ -68,14 +69,14 @@ AUTHENTICATION_BACKENDS = [
STATIC_URL = '/static/'
STATICFILES_DIRS = [
str(Path(PACKAGE_DIR) / 'themes' / ACTIVE_THEME / 'static'),
str(Path(PACKAGE_DIR) / 'themes' / 'default' / 'static'),
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / ACTIVE_THEME / 'static'),
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'default' / 'static'),
]
TEMPLATE_DIRS = [
str(Path(PACKAGE_DIR) / 'themes' / ACTIVE_THEME),
str(Path(PACKAGE_DIR) / 'themes' / 'default'),
str(Path(PACKAGE_DIR) / 'themes'),
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / ACTIVE_THEME),
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'default'),
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME),
]
TEMPLATES = [
@ -100,10 +101,12 @@ TEMPLATES = [
################################################################################
DATABASE_FILE = Path(OUTPUT_DIR) / SQL_INDEX_FILENAME
DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", DATABASE_FILE)
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': str(DATABASE_FILE),
'NAME': DATABASE_NAME,
}
}

View file

View file

@ -0,0 +1,47 @@
from django import template
from django.urls import reverse
from django.contrib.admin.templatetags.base import InclusionAdminNode
from django.templatetags.static import static
from typing import Union
from core.models import ArchiveResult
register = template.Library()
@register.simple_tag
def snapshot_image(snapshot):
result = ArchiveResult.objects.filter(snapshot=snapshot, extractor='screenshot', status='succeeded').first()
if result:
return reverse('LinkAssets', args=[f'{str(snapshot.timestamp)}/{result.output}'])
return static('archive.png')
@register.filter
def file_size(num_bytes: Union[int, float]) -> str:
for count in ['Bytes','KB','MB','GB']:
if num_bytes > -1024.0 and num_bytes < 1024.0:
return '%3.1f %s' % (num_bytes, count)
num_bytes /= 1024.0
return '%3.1f %s' % (num_bytes, 'TB')
def result_list(cl):
"""
Monkey patched result
"""
num_sorted_fields = 0
return {
'cl': cl,
'num_sorted_fields': num_sorted_fields,
'results': cl.result_list,
}
@register.tag(name='snapshots_grid')
def result_list_tag(parser, token):
return InclusionAdminNode(
parser, token,
func=result_list,
template_name='snapshots_grid.html',
takes_context=False,
)

View file

@ -1,39 +0,0 @@
from pathlib import Path
from django.utils.html import format_html
from core.models import Snapshot
def get_icons(snapshot: Snapshot) -> str:
link = snapshot.as_link()
canon = link.canonical_outputs()
out_dir = Path(link.link_dir)
# slow version: highlights icons based on whether files exist or not for that output
# link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
# fast version: all icons are highlighted without checking for outputs in filesystem
link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
return format_html(
'<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">'
'<a href="/{}/{}" class="exists-{}" title="SingleFile">❶ </a>'
'<a href="/{}/{}" class="exists-{}" title="Wget clone">🆆 </a> '
'<a href="/{}/{}" class="exists-{}" title="HTML dump">🅷 </a> '
'<a href="/{}/{}" class="exists-{}" title="PDF">📄 </a> '
'<a href="/{}/{}" class="exists-{}" title="Screenshot">💻 </a> '
'<a href="/{}/{}" class="exists-{}" title="WARC">📦 </a> '
'<a href="/{}/{}/" class="exists-{}" title="Media files">📼 </a> '
'<a href="/{}/{}/" class="exists-{}" title="Git repos">🅶 </a> '
'<a href="{}" class="exists-{}" title="Archive.org snapshot">🏛 </a> '
'</span>',
*link_tuple(link, 'singlefile_path'),
*link_tuple(link, 'wget_path')[:2], any((out_dir / link.domain).glob('*')),
*link_tuple(link, 'dom_path'),
*link_tuple(link, 'pdf_path'),
*link_tuple(link, 'screenshot_path'),
*link_tuple(link, 'warc_path')[:2], any((out_dir / canon['warc_path']).glob('*.warc.gz')),
*link_tuple(link, 'media_path')[:2], any((out_dir / canon['media_path']).glob('*')),
*link_tuple(link, 'git_path')[:2], any((out_dir / canon['git_path']).glob('*')),
canon['archive_org_path'], (out_dir / 'archive.org.txt').exists(),
)

View file

@ -1,113 +0,0 @@
# Taken from https://github.com/jazzband/django-taggit/blob/3b56adb637ab95aca5036c37a358402c825a367c/taggit/utils.py
def parse_tags(tagstring):
"""
Parses tag input, with multiple word input being activated and
delineated by commas and double quotes. Quotes take precedence, so
they may contain commas.
Returns a sorted list of unique tag names.
Ported from Jonathan Buchanan's `django-tagging
<http://django-tagging.googlecode.com/>`_
"""
if not tagstring:
return []
# Special case - if there are no commas or double quotes in the
# input, we don't *do* a recall... I mean, we know we only need to
# split on spaces.
if "," not in tagstring and '"' not in tagstring:
words = list(set(split_strip(tagstring, " ")))
words.sort()
return words
words = []
buffer = []
# Defer splitting of non-quoted sections until we know if there are
# any unquoted commas.
to_be_split = []
saw_loose_comma = False
open_quote = False
i = iter(tagstring)
try:
while True:
c = next(i)
if c == '"':
if buffer:
to_be_split.append("".join(buffer))
buffer = []
# Find the matching quote
open_quote = True
c = next(i)
while c != '"':
buffer.append(c)
c = next(i)
if buffer:
word = "".join(buffer).strip()
if word:
words.append(word)
buffer = []
open_quote = False
else:
if not saw_loose_comma and c == ",":
saw_loose_comma = True
buffer.append(c)
except StopIteration:
# If we were parsing an open quote which was never closed treat
# the buffer as unquoted.
if buffer:
if open_quote and "," in buffer:
saw_loose_comma = True
to_be_split.append("".join(buffer))
if to_be_split:
if saw_loose_comma:
delimiter = ","
else:
delimiter = " "
for chunk in to_be_split:
words.extend(split_strip(chunk, delimiter))
words = list(set(words))
words.sort()
return words
def split_strip(string, delimiter=","):
"""
Splits ``string`` on ``delimiter``, stripping each resulting string
and returning a list of non-empty strings.
Ported from Jonathan Buchanan's `django-tagging
<http://django-tagging.googlecode.com/>`_
"""
if not string:
return []
words = [w.strip() for w in string.split(delimiter)]
return [w for w in words if w]
def edit_string_for_tags(tags):
"""
Given list of ``Tag`` instances, creates a string representation of
the list suitable for editing by the user, such that submitting the
given string representation back without changing it will give the
same list of tags.
Tag names which contain commas will be double quoted.
If any tag name which isn't being quoted contains whitespace, the
resulting string of tag names will be comma-delimited, otherwise
it will be space-delimited.
Ported from Jonathan Buchanan's `django-tagging
<http://django-tagging.googlecode.com/>`_
"""
names = []
for tag in tags:
name = tag.name
if "," in name or " " in name:
names.append('"%s"' % name)
else:
names.append(name)
return ", ".join(sorted(names))

View file

@ -12,17 +12,19 @@ from django.views.generic import FormView
from django.contrib.auth.mixins import UserPassesTestMixin
from core.models import Snapshot
from core.utils import get_icons
from core.forms import AddLinkForm
from ..config import (
OUTPUT_DIR,
PUBLIC_INDEX,
PUBLIC_SNAPSHOTS,
PUBLIC_ADD_VIEW
PUBLIC_ADD_VIEW,
VERSION,
FOOTER_INFO,
)
from main import add
from ..util import base_url, ansi_to_html
from ..index.html import snapshot_icons
class MainIndex(View):
@ -94,13 +96,20 @@ class PublicArchiveView(ListView):
paginate_by = 100
ordering = ['title']
def get_context_data(self, **kwargs):
return {
**super().get_context_data(**kwargs),
'VERSION': VERSION,
'FOOTER_INFO': FOOTER_INFO,
}
def get_queryset(self, **kwargs):
qs = super().get_queryset(**kwargs)
query = self.request.GET.get('q')
if query:
qs = qs.filter(title__icontains=query)
for snapshot in qs:
snapshot.icons = get_icons(snapshot)
snapshot.icons = snapshot_icons(snapshot)
return qs
def get(self, *args, **kwargs):
@ -127,23 +136,29 @@ class AddView(UserPassesTestMixin, FormView):
def test_func(self):
return PUBLIC_ADD_VIEW or self.request.user.is_authenticated
def get_context_data(self, *args, **kwargs):
context = super().get_context_data(*args, **kwargs)
context["title"] = "Add URLs"
# We can't just call request.build_absolute_uri in the template, because it would include query parameters
context["absolute_add_path"] = self.request.build_absolute_uri(self.request.path)
return context
def get_context_data(self, **kwargs):
return {
**super().get_context_data(**kwargs),
'title': "Add URLs",
# We can't just call request.build_absolute_uri in the template, because it would include query parameters
'absolute_add_path': self.request.build_absolute_uri(self.request.path),
'VERSION': VERSION,
'FOOTER_INFO': FOOTER_INFO,
}
def form_valid(self, form):
url = form.cleaned_data["url"]
print(f'[+] Adding URL: {url}')
depth = 0 if form.cleaned_data["depth"] == "0" else 1
extractors = ','.join(form.cleaned_data["archive_methods"])
input_kwargs = {
"urls": url,
"depth": depth,
"update_all": False,
"out_dir": OUTPUT_DIR,
}
if extractors:
input_kwargs.update({"extractors": extractors})
add_stdout = StringIO()
with redirect_stdout(add_stdout):
add(**input_kwargs)

View file

@ -8,6 +8,7 @@ from datetime import datetime
from django.db.models import QuerySet
from ..index.schema import Link
from ..index.sql import write_link_to_sql_index
from ..index import (
load_link_details,
write_link_details,
@ -22,6 +23,7 @@ from ..logging_util import (
log_archive_method_started,
log_archive_method_finished,
)
from ..search import write_search_index
from .title import should_save_title, save_title
from .favicon import should_save_favicon, save_favicon
@ -37,6 +39,7 @@ from .media import should_save_media, save_media
from .archive_org import should_save_archive_dot_org, save_archive_dot_org
from .headers import should_save_headers, save_headers
def get_default_archive_methods():
return [
('title', should_save_title, save_title),
@ -54,6 +57,8 @@ def get_default_archive_methods():
('archive_org', should_save_archive_dot_org, save_archive_dot_org),
]
ARCHIVE_METHODS_INDEXING_PRECEDENCE = [('readability', 1), ('singlefile', 2), ('dom', 3), ('wget', 4)]
@enforce_types
def ignore_methods(to_ignore: List[str]):
ARCHIVE_METHODS = get_default_archive_methods()
@ -62,9 +67,16 @@ def ignore_methods(to_ignore: List[str]):
return list(methods)
@enforce_types
def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None, skip_index: bool=False) -> Link:
def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None) -> Link:
"""download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""
# TODO: Remove when the input is changed to be a snapshot. Suboptimal approach.
from core.models import Snapshot, ArchiveResult
try:
snapshot = Snapshot.objects.get(url=link.url) # TODO: This will be unnecessary once everything is a snapshot
except Snapshot.DoesNotExist:
snapshot = write_link_to_sql_index(link)
ARCHIVE_METHODS = get_default_archive_methods()
if methods:
@ -80,7 +92,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
os.makedirs(out_dir)
link = load_link_details(link, out_dir=out_dir)
write_link_details(link, out_dir=out_dir, skip_sql_index=skip_index)
write_link_details(link, out_dir=out_dir, skip_sql_index=False)
log_link_archiving_started(link, out_dir, is_new)
link = link.overwrite(updated=datetime.now())
stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
@ -99,6 +111,10 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
stats[result.status] += 1
log_archive_method_finished(result)
write_search_index(link=link, texts=result.index_texts)
ArchiveResult.objects.create(snapshot=snapshot, extractor=method_name, cmd=result.cmd, cmd_version=result.cmd_version,
output=result.output, pwd=result.pwd, start_ts=result.start_ts, end_ts=result.end_ts, status=result.status)
else:
# print('{black} X {}{reset}'.format(method_name, **ANSI))
stats['skipped'] += 1
@ -117,7 +133,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
except Exception:
pass
write_link_details(link, out_dir=out_dir, skip_sql_index=skip_index)
write_link_details(link, out_dir=out_dir, skip_sql_index=False)
log_link_archiving_finished(link, link.link_dir, is_new, stats)

View file

@ -71,6 +71,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
CURL_BINARY,
link.url
]
readability_content = None
timer = TimedProgress(timeout, prefix=' ')
try:
document = get_html(link, out_dir)
@ -86,8 +87,9 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
result = run(cmd, cwd=out_dir, timeout=timeout)
result_json = json.loads(result.stdout)
output_folder.mkdir(exist_ok=True)
readability_content = result_json.pop("textContent")
atomic_write(str(output_folder / "content.html"), result_json.pop("content"))
atomic_write(str(output_folder / "content.txt"), result_json.pop("textContent"))
atomic_write(str(output_folder / "content.txt"), readability_content)
atomic_write(str(output_folder / "article.json"), result_json)
# parse out number of files downloaded from last line of stderr:
@ -117,5 +119,6 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
cmd_version=READABILITY_VERSION,
output=output,
status=status,
**timer.stats,
index_texts= [readability_content] if readability_content else [],
**timer.stats,
)

View file

@ -20,7 +20,6 @@ from ..config import (
CURL_ARGS,
CURL_VERSION,
CURL_USER_AGENT,
setup_django,
)
from ..logging_util import TimedProgress
@ -81,7 +80,6 @@ def extract_title_with_regex(html):
def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
"""try to guess the page's title from its content"""
setup_django(out_dir=out_dir)
from core.models import Snapshot
output: ArchiveOutput = None

View file

@ -18,7 +18,6 @@ from ..util import (
ExtendedEncoder,
)
from ..config import (
setup_django,
ARCHIVE_DIR_NAME,
SQL_INDEX_FILENAME,
JSON_INDEX_FILENAME,
@ -51,6 +50,8 @@ from .sql import (
write_sql_link_details,
)
from ..search import search_backend_enabled, query_search_index
### Link filtering and checking
@enforce_types
@ -221,7 +222,7 @@ def timed_index_update(out_path: Path):
@enforce_types
def write_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, finished: bool=False) -> None:
def write_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None:
"""Writes links to sqlite3 file for a given list of links"""
log_indexing_process_started(len(links))
@ -241,16 +242,9 @@ def write_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, finished: bool
log_indexing_process_finished()
@enforce_types
def get_empty_snapshot_queryset(out_dir: Path=OUTPUT_DIR):
setup_django(out_dir, check_db=True)
from core.models import Snapshot
return Snapshot.objects.none()
@enforce_types
def load_main_index(out_dir: Path=OUTPUT_DIR, warn: bool=True) -> List[Link]:
"""parse and load existing index with any new links from import_path merged in"""
setup_django(out_dir, check_db=True)
from core.models import Snapshot
try:
return Snapshot.objects.all()
@ -365,7 +359,7 @@ LINK_FILTERS = {
}
@enforce_types
def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
def q_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
q_filter = Q()
for pattern in filter_patterns:
try:
@ -380,10 +374,36 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type
raise SystemExit(2)
return snapshots.filter(q_filter)
def search_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='search') -> QuerySet:
if not search_backend_enabled():
stderr()
stderr(
'[X] The search backend is not enabled, set config.USE_SEARCHING_BACKEND = True',
color='red',
)
raise SystemExit(2)
from core.models import Snapshot
qsearch = Snapshot.objects.none()
for pattern in filter_patterns:
try:
qsearch |= query_search_index(pattern)
except:
raise SystemExit(2)
return snapshots & qsearch
@enforce_types
def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
if filter_type != 'search':
return q_filter(snapshots, filter_patterns, filter_type)
else:
return search_filter(snapshots, filter_patterns, filter_type)
def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
"""indexed links without checking archive status or data directory validity"""
links = [snapshot.as_link() for snapshot in snapshots.iterator()]
links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
return {
link.link_dir: link
for link in links
@ -391,7 +411,7 @@ def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Option
def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
"""indexed links that are archived with a valid data directory"""
links = [snapshot.as_link() for snapshot in snapshots.iterator()]
links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
return {
link.link_dir: link
for link in filter(is_archived, links)
@ -399,7 +419,7 @@ def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optio
def get_unarchived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
"""indexed links that are unarchived with no data directory or an empty data directory"""
links = [snapshot.as_link() for snapshot in snapshots.iterator()]
links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
return {
link.link_dir: link
for link in filter(is_unarchived, links)
@ -424,7 +444,7 @@ def get_present_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Option
def get_valid_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
"""dirs with a valid index matched to the main index and archived content"""
links = [snapshot.as_link() for snapshot in snapshots.iterator()]
links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
return {
link.link_dir: link
for link in filter(is_valid, links)

View file

@ -1,12 +1,14 @@
__package__ = 'archivebox.index'
from string import Template
from datetime import datetime
from typing import List, Optional, Iterator, Mapping
from pathlib import Path
from django.utils.html import format_html
from collections import defaultdict
from .schema import Link
from ..system import atomic_write, copy_and_overwrite
from ..system import atomic_write
from ..logging_util import printable_filesize
from ..util import (
enforce_types,
@ -17,21 +19,15 @@ from ..util import (
)
from ..config import (
OUTPUT_DIR,
TEMPLATES_DIR,
VERSION,
GIT_SHA,
FOOTER_INFO,
ARCHIVE_DIR_NAME,
HTML_INDEX_FILENAME,
STATIC_DIR_NAME,
ROBOTS_TXT_FILENAME,
FAVICON_FILENAME,
)
MAIN_INDEX_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index.html')
MINIMAL_INDEX_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index_minimal.html')
MAIN_INDEX_ROW_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index_row.html')
LINK_DETAILS_TEMPLATE = str(Path(TEMPLATES_DIR) / 'link_details.html')
MAIN_INDEX_TEMPLATE = 'main_index.html'
MINIMAL_INDEX_TEMPLATE = 'main_index_minimal.html'
LINK_DETAILS_TEMPLATE = 'link_details.html'
TITLE_LOADING_MSG = 'Not yet archived...'
@ -50,62 +46,25 @@ def parse_html_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[str]:
return ()
@enforce_types
def write_html_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, finished: bool=False) -> None:
"""write the html link index to a given path"""
copy_and_overwrite(str(Path(TEMPLATES_DIR) / FAVICON_FILENAME), str(out_dir / FAVICON_FILENAME))
copy_and_overwrite(str(Path(TEMPLATES_DIR) / ROBOTS_TXT_FILENAME), str(out_dir / ROBOTS_TXT_FILENAME))
copy_and_overwrite(str(Path(TEMPLATES_DIR) / STATIC_DIR_NAME), str(out_dir / STATIC_DIR_NAME))
rendered_html = main_index_template(links, finished=finished)
atomic_write(str(out_dir / HTML_INDEX_FILENAME), rendered_html)
def generate_index_from_links(links: List[Link], with_headers: bool):
if with_headers:
output = main_index_template(links)
else:
output = main_index_template(links, template=MINIMAL_INDEX_TEMPLATE)
return output
@enforce_types
def main_index_template(links: List[Link], finished: bool=True, template: str=MAIN_INDEX_TEMPLATE) -> str:
def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) -> str:
"""render the template for the entire main index"""
return render_legacy_template(template, {
return render_django_template(template, {
'version': VERSION,
'git_sha': GIT_SHA,
'num_links': str(len(links)),
'status': 'finished' if finished else 'running',
'date_updated': datetime.now().strftime('%Y-%m-%d'),
'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'),
'rows': '\n'.join(
main_index_row_template(link)
for link in links
),
'footer_info': FOOTER_INFO,
})
@enforce_types
def main_index_row_template(link: Link) -> str:
"""render the template for an individual link row of the main index"""
from ..extractors.wget import wget_output_path
return render_legacy_template(MAIN_INDEX_ROW_TEMPLATE, {
**link._asdict(extended=True),
# before pages are finished archiving, show loading msg instead of title
'title': htmlencode(
link.title
or (link.base_url if link.is_archived else TITLE_LOADING_MSG)
),
# before pages are finished archiving, show fallback loading favicon
'favicon_url': (
str(Path(ARCHIVE_DIR_NAME) / link.timestamp / 'favicon.ico')
# if link['is_archived'] else ''
),
# before pages are finished archiving, show the details page instead
'wget_url': urlencode(wget_output_path(link) or 'index.html'),
# replace commas in tags with spaces, or file extension if it's static
'tags': (link.tags or '') + (' {}'.format(link.extension) if link.is_static else ''),
'links': [link._asdict(extended=True) for link in links],
'FOOTER_INFO': FOOTER_INFO,
})
@ -126,7 +85,7 @@ def link_details_template(link: Link) -> str:
link_info = link._asdict(extended=True)
return render_legacy_template(LINK_DETAILS_TEMPLATE, {
return render_django_template(LINK_DETAILS_TEMPLATE, {
**link_info,
**link_info['canonical'],
'title': htmlencode(
@ -146,12 +105,60 @@ def link_details_template(link: Link) -> str:
'oldest_archive_date': ts_to_date(link.oldest_archive_date),
})
@enforce_types
def render_legacy_template(template_path: str, context: Mapping[str, str]) -> str:
def render_django_template(template: str, context: Mapping[str, str]) -> str:
"""render a given html template string with the given template content"""
from django.template.loader import render_to_string
# will be replaced by django templates in the future
with open(template_path, 'r', encoding='utf-8') as template:
template_str = template.read()
return Template(template_str).substitute(**context)
return render_to_string(template, context)
def snapshot_icons(snapshot) -> str:
from core.models import EXTRACTORS
archive_results = snapshot.archiveresult_set.filter(status="succeeded")
link = snapshot.as_link()
path = link.archive_path
canon = link.canonical_outputs()
output = ""
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
icons = {
"singlefile": "",
"wget": "🆆",
"dom": "🅷",
"pdf": "📄",
"screenshot": "💻",
"media": "📼",
"git": "🅶",
"archive_org": "🏛",
"readability": "🆁",
"mercury": "🅼",
"warc": "📦"
}
exclude = ["favicon", "title", "headers", "archive_org"]
# Missing specific entry for WARC
extractor_items = defaultdict(lambda: None)
for extractor, _ in EXTRACTORS:
for result in archive_results:
if result.extractor == extractor:
extractor_items[extractor] = result
for extractor, _ in EXTRACTORS:
if extractor not in exclude:
exists = extractor_items[extractor] is not None
output += output_template.format(path, canon[f"{extractor}_path"], str(exists),
extractor, icons.get(extractor, "?"))
if extractor == "wget":
# warc isn't technically it's own extractor, so we have to add it after wget
exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
output += output_template.format(exists[0] if exists else '#', canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?"))
if extractor == "archive_org":
# The check for archive_org is different, so it has to be handled separately
target_path = Path(path) / "archive.org.txt"
exists = target_path.exists()
output += '<a href="{}" class="exists-{}" title="{}">{}</a> '.format(canon["archive_org_path"], str(exists),
"archive_org", icons.get("archive_org", "?"))
return format_html(f'<span class="files-icons" style="font-size: 1.1em; opacity: 0.8">{output}<span>')

View file

@ -8,7 +8,7 @@ from pathlib import Path
from datetime import datetime
from typing import List, Optional, Iterator, Any, Union
from .schema import Link, ArchiveResult
from .schema import Link
from ..system import atomic_write
from ..util import enforce_types
from ..config import (
@ -39,7 +39,20 @@ MAIN_INDEX_HEADER = {
},
}
### Main Links Index
@enforce_types
def generate_json_index_from_links(links: List[Link], with_headers: bool):
if with_headers:
output = {
**MAIN_INDEX_HEADER,
'num_links': len(links),
'updated': datetime.now(),
'last_run_cmd': sys.argv,
'links': links,
}
else:
output = links
return to_json(output, indent=4, sort_keys=True)
@enforce_types
def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
@ -65,30 +78,6 @@ def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
continue
return ()
@enforce_types
def write_json_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None:
"""write the json link index to a given path"""
assert isinstance(links, List), 'Links must be a list, not a generator.'
assert not links or isinstance(links[0].history, dict)
assert not links or isinstance(links[0].sources, list)
if links and links[0].history.get('title'):
assert isinstance(links[0].history['title'][0], ArchiveResult)
if links and links[0].sources:
assert isinstance(links[0].sources[0], str)
main_index_json = {
**MAIN_INDEX_HEADER,
'num_links': len(links),
'updated': datetime.now(),
'last_run_cmd': sys.argv,
'links': links,
}
atomic_write(str(Path(out_dir) / JSON_INDEX_FILENAME), main_index_json)
### Link Details Index
@enforce_types

View file

@ -1,3 +1,11 @@
"""
WARNING: THIS FILE IS ALL LEGACY CODE TO BE REMOVED.
DO NOT ADD ANY NEW FEATURES TO THIS FILE, NEW CODE GOES HERE: core/models.py
"""
__package__ = 'archivebox.index'
from pathlib import Path
@ -31,6 +39,7 @@ class ArchiveResult:
status: str
start_ts: datetime
end_ts: datetime
index_texts: Union[List[str], None] = None
schema: str = 'ArchiveResult'
def __post_init__(self):
@ -207,6 +216,10 @@ class Link:
})
return info
def as_snapshot(self):
from core.models import Snapshot
return Snapshot.objects.get(url=self.url)
@classmethod
def from_json(cls, json_info, guess=False):
from ..util import parse_date
@ -339,7 +352,7 @@ class Link:
### Archive Status Helpers
@property
def num_outputs(self) -> int:
return len(tuple(filter(None, self.latest_outputs().values())))
return self.as_snapshot().num_outputs
@property
def num_failures(self) -> int:

View file

@ -4,17 +4,17 @@ from io import StringIO
from pathlib import Path
from typing import List, Tuple, Iterator
from django.db.models import QuerySet
from django.db import transaction
from .schema import Link
from ..util import enforce_types
from ..config import setup_django, OUTPUT_DIR
from ..config import OUTPUT_DIR
### Main Links Index
@enforce_types
def parse_sql_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
setup_django(out_dir, check_db=True)
from core.models import Snapshot
return (
@ -24,9 +24,6 @@ def parse_sql_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
@enforce_types
def remove_from_sql_main_index(snapshots: QuerySet, out_dir: Path=OUTPUT_DIR) -> None:
setup_django(out_dir, check_db=True)
from django.db import transaction
with transaction.atomic():
snapshots.delete()
@ -51,9 +48,6 @@ def write_link_to_sql_index(link: Link):
@enforce_types
def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None:
setup_django(out_dir, check_db=True)
from django.db import transaction
with transaction.atomic():
for link in links:
write_link_to_sql_index(link)
@ -61,9 +55,7 @@ def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None:
@enforce_types
def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None:
setup_django(out_dir, check_db=True)
from core.models import Snapshot
from django.db import transaction
with transaction.atomic():
try:
@ -84,7 +76,6 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None:
@enforce_types
def list_migrations(out_dir: Path=OUTPUT_DIR) -> List[Tuple[bool, str]]:
setup_django(out_dir, check_db=False)
from django.core.management import call_command
out = StringIO()
call_command("showmigrations", list=True, stdout=out)
@ -101,7 +92,6 @@ def list_migrations(out_dir: Path=OUTPUT_DIR) -> List[Tuple[bool, str]]:
@enforce_types
def apply_migrations(out_dir: Path=OUTPUT_DIR) -> List[str]:
setup_django(out_dir, check_db=False)
from django.core.management import call_command
null, out = StringIO(), StringIO()
call_command("makemigrations", interactive=False, stdout=null)
@ -112,6 +102,5 @@ def apply_migrations(out_dir: Path=OUTPUT_DIR) -> List[str]:
@enforce_types
def get_admins(out_dir: Path=OUTPUT_DIR) -> List[str]:
setup_django(out_dir, check_db=False)
from django.contrib.auth.models import User
return User.objects.filter(is_superuser=True)

View file

@ -19,6 +19,7 @@ if TYPE_CHECKING:
from .util import enforce_types
from .config import (
ConfigDict,
OUTPUT_DIR,
PYTHON_ENCODING,
ANSI,
IS_TTY,
@ -443,7 +444,7 @@ def log_shell_welcome_msg():
from .cli import list_subcommands
print('{green}# ArchiveBox Imports{reset}'.format(**ANSI))
print('{green}from archivebox.core.models import Snapshot, User{reset}'.format(**ANSI))
print('{green}from core.models import Snapshot, User{reset}'.format(**ANSI))
print('{green}from archivebox import *\n {}{reset}'.format("\n ".join(list_subcommands().keys()), **ANSI))
print()
print('[i] Welcome to the ArchiveBox Shell!')
@ -477,39 +478,7 @@ def printable_filesize(num_bytes: Union[int, float]) -> str:
@enforce_types
def printable_folders(folders: Dict[str, Optional["Link"]],
json: bool=False,
html: bool=False,
csv: Optional[str]=None,
with_headers: bool=False) -> str:
from .index.json import MAIN_INDEX_HEADER
links = folders.values()
if json:
from .index.json import to_json
if with_headers:
output = {
**MAIN_INDEX_HEADER,
'num_links': len(links),
'updated': datetime.now(),
'last_run_cmd': sys.argv,
'links': links,
}
else:
output = links
return to_json(output, indent=4, sort_keys=True)
elif html:
from .index.html import main_index_template
if with_headers:
output = main_index_template(links, True)
else:
from .index.html import MINIMAL_INDEX_TEMPLATE
output = main_index_template(links, True, MINIMAL_INDEX_TEMPLATE)
return output
elif csv:
from .index.csv import links_to_csv
return links_to_csv(folders.values(), cols=csv.split(','), header=with_headers)
return '\n'.join(
f'{folder} {link and link.url} "{link and link.title}"'
for folder, link in folders.items()
@ -546,19 +515,24 @@ def printable_folder_status(name: str, folder: Dict) -> str:
else:
num_files = 'missing'
if ' ' in str(folder['path']):
folder['path'] = f'"{folder["path"]}"'
path = str(folder['path']).replace(str(OUTPUT_DIR), '.') if folder['path'] else ''
if path and ' ' in path:
path = f'"{path}"'
# if path is just a plain dot, replace it back with the full path for clarity
if path == '.':
path = str(OUTPUT_DIR)
return ' '.join((
ANSI[color],
symbol,
ANSI['reset'],
name.ljust(22),
(str(folder["path"]) or '').ljust(76),
name.ljust(21),
num_files.ljust(14),
ANSI[color],
note,
note.ljust(8),
ANSI['reset'],
path.ljust(76),
))
@ -578,17 +552,18 @@ def printable_dependency_version(name: str, dependency: Dict) -> str:
else:
color, symbol, note, version = 'lightyellow', '-', 'disabled', '-'
if ' ' in (dependency["path"] or ''):
dependency["path"] = f'"{dependency["path"]}"'
path = str(dependency["path"]).replace(str(OUTPUT_DIR), '.') if dependency["path"] else ''
if path and ' ' in path:
path = f'"{path}"'
return ' '.join((
ANSI[color],
symbol,
ANSI['reset'],
name.ljust(22),
(dependency["path"] or '').ljust(76),
name.ljust(21),
version.ljust(14),
ANSI[color],
note,
note.ljust(8),
ANSI['reset'],
path.ljust(76),
))

View file

@ -29,7 +29,6 @@ from .util import enforce_types # type: ignore
from .system import get_dir_size, dedupe_cron_jobs, CRON_COMMENT
from .index import (
load_main_index,
get_empty_snapshot_queryset,
parse_links_from_source,
dedupe_links,
write_main_index,
@ -45,16 +44,22 @@ from .index import (
get_corrupted_folders,
get_unrecognized_folders,
fix_invalid_folder_locations,
write_link_details,
)
from .index.json import (
parse_json_main_index,
parse_json_links_details,
generate_json_index_from_links,
)
from .index.sql import (
get_admins,
apply_migrations,
remove_from_sql_main_index,
)
from .index.html import (
generate_index_from_links,
)
from .index.csv import links_to_csv
from .extractors import archive_links, archive_link, ignore_methods
from .config import (
stderr,
@ -83,7 +88,6 @@ from .config import (
check_dependencies,
check_data_folder,
write_config_file,
setup_django,
VERSION,
CODE_LOCATIONS,
EXTERNAL_LOCATIONS,
@ -110,6 +114,7 @@ from .logging_util import (
printable_dependency_version,
)
from .search import flush_search_index, index_links
ALLOWED_IN_OUTPUT_DIR = {
'lost+found',
@ -212,7 +217,7 @@ def version(quiet: bool=False,
else:
print('ArchiveBox v{}'.format(VERSION))
p = platform.uname()
print(p.system, platform.platform(), p.machine)
print(sys.implementation.name.title(), p.system, platform.platform(), p.machine, '(in Docker)' if IN_DOCKER else '(not in Docker)')
print()
print('{white}[i] Dependency versions:{reset}'.format(**ANSI))
@ -259,6 +264,7 @@ def run(subcommand: str,
@enforce_types
def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
"""Initialize a new ArchiveBox collection in the current directory"""
from core.models import Snapshot
Path(out_dir).mkdir(exist_ok=True)
is_empty = not len(set(os.listdir(out_dir)) - ALLOWED_IN_OUTPUT_DIR)
@ -312,7 +318,6 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
else:
print('\n{green}[+] Building main SQL index and running migrations...{reset}'.format(**ANSI))
setup_django(out_dir, check_db=False)
DATABASE_FILE = Path(out_dir) / SQL_INDEX_FILENAME
print(f'{DATABASE_FILE}')
print()
@ -330,7 +335,7 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
print()
print('{green}[*] Collecting links from any existing indexes and archive folders...{reset}'.format(**ANSI))
all_links = get_empty_snapshot_queryset()
all_links = Snapshot.objects.none()
pending_links: Dict[str, Link] = {}
if existing_index:
@ -378,7 +383,7 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
print(' archivebox list --status=invalid')
write_main_index(list(pending_links.values()), out_dir=out_dir, finished=True)
write_main_index(list(pending_links.values()), out_dir=out_dir)
print('\n{green}------------------------------------------------------------------{reset}'.format(**ANSI))
if existing_index:
@ -506,7 +511,7 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
@enforce_types
def oneshot(url: str, out_dir: Path=OUTPUT_DIR):
def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
"""
Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
You can run this to archive single pages without needing to create a whole collection with archivebox init.
@ -518,8 +523,9 @@ def oneshot(url: str, out_dir: Path=OUTPUT_DIR):
color='red'
)
raise SystemExit(2)
methods = ignore_methods(['title'])
archive_link(oneshot_link[0], out_dir=out_dir, methods=methods, skip_index=True)
methods = extractors.split(",") if extractors else ignore_methods(['title'])
archive_link(oneshot_link[0], out_dir=out_dir, methods=methods)
return oneshot_link
@enforce_types
@ -529,8 +535,8 @@ def add(urls: Union[str, List[str]],
index_only: bool=False,
overwrite: bool=False,
init: bool=False,
out_dir: Path=OUTPUT_DIR,
extractors: str="") -> List[Link]:
extractors: str="",
out_dir: Path=OUTPUT_DIR) -> List[Link]:
"""Add a new URL or list of URLs to your archive"""
assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
@ -567,7 +573,7 @@ def add(urls: Union[str, List[str]],
imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values())
new_links = dedupe_links(all_links, imported_links)
write_main_index(links=new_links, out_dir=out_dir, finished=not new_links)
write_main_index(links=new_links, out_dir=out_dir)
all_links = load_main_index(out_dir=out_dir)
if index_only:
@ -585,7 +591,7 @@ def add(urls: Union[str, List[str]],
archive_links(imported_links, overwrite=True, **archive_kwargs)
elif new_links:
archive_links(new_links, overwrite=False, **archive_kwargs)
return all_links
@enforce_types
@ -660,6 +666,7 @@ def remove(filter_str: Optional[str]=None,
to_remove = snapshots.count()
flush_search_index(snapshots=snapshots)
remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir)
all_snapshots = load_main_index(out_dir=out_dir)
log_removal_finished(all_snapshots.count(), to_remove)
@ -677,6 +684,7 @@ def update(resume: Optional[float]=None,
status: Optional[str]=None,
after: Optional[str]=None,
before: Optional[str]=None,
extractors: str="",
out_dir: Path=OUTPUT_DIR) -> List[Link]:
"""Import any new links from subscriptions and retry any previously failed/skipped links"""
@ -684,6 +692,8 @@ def update(resume: Optional[float]=None,
check_dependencies()
new_links: List[Link] = [] # TODO: Remove input argument: only_new
extractors = extractors.split(",") if extractors else []
# Step 1: Filter for selected_links
matching_snapshots = list_links(
filter_patterns=filter_patterns,
@ -700,6 +710,9 @@ def update(resume: Optional[float]=None,
all_links = [link for link in matching_folders.values() if link]
if index_only:
for link in all_links:
write_link_details(link, out_dir=out_dir, skip_sql_index=True)
index_links(all_links, out_dir=out_dir)
return all_links
# Step 2: Run the archive methods for each link
@ -714,7 +727,13 @@ def update(resume: Optional[float]=None,
stderr(f'[√] Nothing found to resume after {resume}', color='green')
return all_links
archive_links(to_archive, overwrite=overwrite, out_dir=out_dir)
archive_kwargs = {
"out_dir": out_dir,
}
if extractors:
archive_kwargs["methods"] = extractors
archive_links(to_archive, overwrite=overwrite, **archive_kwargs)
# Step 4: Re-write links index with updated titles, icons, and resources
all_links = load_main_index(out_dir=out_dir)
@ -747,7 +766,6 @@ def list_all(filter_patterns_str: Optional[str]=None,
elif filter_patterns_str:
filter_patterns = filter_patterns_str.split('\n')
snapshots = list_links(
filter_patterns=filter_patterns,
filter_type=filter_type,
@ -763,8 +781,16 @@ def list_all(filter_patterns_str: Optional[str]=None,
status=status,
out_dir=out_dir,
)
print(printable_folders(folders, json=json, csv=csv, html=html, with_headers=with_headers))
if json:
output = generate_json_index_from_links(folders.values(), with_headers)
elif html:
output = generate_index_from_links(folders.values(), with_headers)
elif csv:
output = links_to_csv(folders.values(), cols=csv.split(','), header=with_headers)
else:
output = printable_folders(folders, with_headers=with_headers)
print(output)
return folders
@ -1048,7 +1074,6 @@ def server(runserver_args: Optional[List[str]]=None,
config.DEBUG = config.DEBUG or debug
check_data_folder(out_dir=out_dir)
setup_django(out_dir)
from django.core.management import call_command
from django.contrib.auth.models import User
@ -1085,7 +1110,6 @@ def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
"""Run an ArchiveBox Django management command"""
check_data_folder(out_dir=out_dir)
setup_django(out_dir)
from django.core.management import execute_from_command_line
if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY):
@ -1102,7 +1126,6 @@ def shell(out_dir: Path=OUTPUT_DIR) -> None:
check_data_folder(out_dir=out_dir)
setup_django(OUTPUT_DIR)
from django.core.management import call_command
call_command("shell_plus")

View file

@ -8,10 +8,9 @@ if __name__ == '__main__':
# (e.g. makemigrations), you can comment out this check temporarily
if not ('makemigrations' in sys.argv or 'migrate' in sys.argv):
print("[X] Don't run ./manage.py directly, use the archivebox CLI instead e.g.:")
print(' archivebox manage createsuperuser')
print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):")
print()
print(' Hint: Use these archivebox commands instead of the ./manage.py equivalents:')
print(' Hint: Use these archivebox CLI commands instead of the ./manage.py equivalents:')
print(' archivebox init (migrates the databse to latest version)')
print(' archivebox server (runs the Django web server)')
print(' archivebox shell (opens an iPython Django shell with all models imported)')

View file

@ -32,6 +32,7 @@ from ..index.schema import Link
from ..logging_util import TimedProgress, log_source_saved
from .pocket_html import parse_pocket_html_export
from .pocket_api import parse_pocket_api_export
from .pinboard_rss import parse_pinboard_rss_export
from .wallabag_atom import parse_wallabag_atom_export
from .shaarli_rss import parse_shaarli_rss_export
@ -44,6 +45,7 @@ from .generic_txt import parse_generic_txt_export
PARSERS = (
# Specialized parsers
('Pocket API', parse_pocket_api_export),
('Wallabag ATOM', parse_wallabag_atom_export),
('Pocket HTML', parse_pocket_html_export),
('Pinboard RSS', parse_pinboard_rss_export),

View file

@ -0,0 +1,113 @@
__package__ = 'archivebox.parsers'
import re
from typing import IO, Iterable, Optional
from configparser import ConfigParser
from pathlib import Path
from ..vendor.pocket import Pocket
from ..index.schema import Link
from ..util import enforce_types
from ..system import atomic_write
from ..config import (
SOURCES_DIR,
POCKET_CONSUMER_KEY,
POCKET_ACCESS_TOKENS,
)
COUNT_PER_PAGE = 500
API_DB_PATH = Path(SOURCES_DIR) / 'pocket_api.db'
# search for broken protocols that sometimes come from the Pocket API
_BROKEN_PROTOCOL_RE = re.compile('^(http[s]?)(:/(?!/))')
def get_pocket_articles(api: Pocket, since=None, page=0):
body, headers = api.get(
state='archive',
sort='oldest',
since=since,
count=COUNT_PER_PAGE,
offset=page * COUNT_PER_PAGE,
)
articles = body['list'].values() if isinstance(body['list'], dict) else body['list']
returned_count = len(articles)
yield from articles
if returned_count == COUNT_PER_PAGE:
yield from get_pocket_articles(api, since=since, page=page + 1)
else:
api.last_since = body['since']
def link_from_article(article: dict, sources: list):
url: str = article['resolved_url'] or article['given_url']
broken_protocol = _BROKEN_PROTOCOL_RE.match(url)
if broken_protocol:
url = url.replace(f'{broken_protocol.group(1)}:/', f'{broken_protocol.group(1)}://')
title = article['resolved_title'] or article['given_title'] or url
return Link(
url=url,
timestamp=article['time_read'],
title=title,
tags=article.get('tags'),
sources=sources
)
def write_since(username: str, since: str):
if not API_DB_PATH.exists():
atomic_write(API_DB_PATH, '')
since_file = ConfigParser()
since_file.optionxform = str
since_file.read(API_DB_PATH)
since_file[username] = {
'since': since
}
with open(API_DB_PATH, 'w+') as new:
since_file.write(new)
def read_since(username: str) -> Optional[str]:
if not API_DB_PATH.exists():
atomic_write(API_DB_PATH, '')
config_file = ConfigParser()
config_file.optionxform = str
config_file.read(API_DB_PATH)
return config_file.get(username, 'since', fallback=None)
@enforce_types
def should_parse_as_pocket_api(text: str) -> bool:
return text.startswith('pocket://')
@enforce_types
def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
"""Parse bookmarks from the Pocket API"""
input_buffer.seek(0)
pattern = re.compile(r"^pocket:\/\/(\w+)")
for line in input_buffer:
if should_parse_as_pocket_api(line):
username = pattern.search(line).group(1)
api = Pocket(POCKET_CONSUMER_KEY, POCKET_ACCESS_TOKENS[username])
api.last_since = None
for article in get_pocket_articles(api, since=read_since(username)):
yield link_from_article(article, sources=[line])
write_since(username, api.last_since)

View file

@ -0,0 +1,108 @@
from typing import List, Union
from pathlib import Path
from importlib import import_module
from django.db.models import QuerySet
from archivebox.index.schema import Link
from archivebox.util import enforce_types
from archivebox.config import stderr, OUTPUT_DIR, USE_INDEXING_BACKEND, USE_SEARCHING_BACKEND, SEARCH_BACKEND_ENGINE
from .utils import get_indexable_content, log_index_started
def indexing_enabled():
return USE_INDEXING_BACKEND
def search_backend_enabled():
return USE_SEARCHING_BACKEND
def get_backend():
return f'search.backends.{SEARCH_BACKEND_ENGINE}'
def import_backend():
backend_string = get_backend()
try:
backend = import_module(backend_string)
except Exception as err:
raise Exception("Could not load '%s' as a backend: %s" % (backend_string, err))
return backend
@enforce_types
def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir: Path=OUTPUT_DIR, skip_text_index: bool=False) -> None:
if not indexing_enabled():
return
if not skip_text_index and texts:
from core.models import Snapshot
snap = Snapshot.objects.filter(url=link.url).first()
backend = import_backend()
if snap:
try:
backend.index(snapshot_id=str(snap.id), texts=texts)
except Exception as err:
stderr()
stderr(
f'[X] The search backend threw an exception={err}:',
color='red',
)
@enforce_types
def query_search_index(query: str, out_dir: Path=OUTPUT_DIR) -> QuerySet:
from core.models import Snapshot
if search_backend_enabled():
backend = import_backend()
try:
snapshot_ids = backend.search(query)
except Exception as err:
stderr()
stderr(
f'[X] The search backend threw an exception={err}:',
color='red',
)
raise
else:
# TODO preserve ordering from backend
qsearch = Snapshot.objects.filter(pk__in=snapshot_ids)
return qsearch
return Snapshot.objects.none()
@enforce_types
def flush_search_index(snapshots: QuerySet):
if not indexing_enabled() or not snapshots:
return
backend = import_backend()
snapshot_ids=(str(pk) for pk in snapshots.values_list('pk',flat=True))
try:
backend.flush(snapshot_ids)
except Exception as err:
stderr()
stderr(
f'[X] The search backend threw an exception={err}:',
color='red',
)
@enforce_types
def index_links(links: Union[List[Link],None], out_dir: Path=OUTPUT_DIR):
if not links:
return
from core.models import Snapshot, ArchiveResult
for link in links:
snap = Snapshot.objects.filter(url=link.url).first()
if snap:
results = ArchiveResult.objects.indexable().filter(snapshot=snap)
log_index_started(link.url)
try:
texts = get_indexable_content(results)
except Exception as err:
stderr()
stderr(
f'[X] An Exception ocurred reading the indexable content={err}:',
color='red',
)
else:
write_search_index(link, texts, out_dir=out_dir)

View file

View file

@ -0,0 +1,45 @@
import re
from subprocess import run, PIPE
from typing import List, Generator
from archivebox.config import ARCHIVE_DIR, RIPGREP_VERSION
from archivebox.util import enforce_types
RG_IGNORE_EXTENSIONS = ('css','js','orig','svg')
RG_ADD_TYPE = '--type-add'
RG_IGNORE_ARGUMENTS = f"ignore:*.{{{','.join(RG_IGNORE_EXTENSIONS)}}}"
RG_DEFAULT_ARGUMENTS = "-ilTignore" # Case insensitive(i), matching files results(l)
RG_REGEX_ARGUMENT = '-e'
TIMESTAMP_REGEX = r'\/([\d]+\.[\d]+)\/'
ts_regex = re.compile(TIMESTAMP_REGEX)
@enforce_types
def index(snapshot_id: str, texts: List[str]):
return
@enforce_types
def flush(snapshot_ids: Generator[str, None, None]):
return
@enforce_types
def search(text: str) -> List[str]:
if not RIPGREP_VERSION:
raise Exception("ripgrep binary not found, install ripgrep to use this search backend")
from core.models import Snapshot
rg_cmd = ['rg', RG_ADD_TYPE, RG_IGNORE_ARGUMENTS, RG_DEFAULT_ARGUMENTS, RG_REGEX_ARGUMENT, text, str(ARCHIVE_DIR)]
rg = run(rg_cmd, stdout=PIPE, stderr=PIPE, timeout=60)
file_paths = [p.decode() for p in rg.stdout.splitlines()]
timestamps = set()
for path in file_paths:
ts = ts_regex.findall(path)
if ts:
timestamps.add(ts[0])
snap_ids = [str(id) for id in Snapshot.objects.filter(timestamp__in=timestamps).values_list('pk', flat=True)]
return snap_ids

View file

@ -0,0 +1,28 @@
from typing import List, Generator
from sonic import IngestClient, SearchClient
from archivebox.util import enforce_types
from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD, SONIC_BUCKET, SONIC_COLLECTION
MAX_SONIC_TEXT_LENGTH = 20000
@enforce_types
def index(snapshot_id: str, texts: List[str]):
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
for text in texts:
chunks = [text[i:i+MAX_SONIC_TEXT_LENGTH] for i in range(0, len(text), MAX_SONIC_TEXT_LENGTH)]
for chunk in chunks:
ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(chunk))
@enforce_types
def search(text: str) -> List[str]:
with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl:
snap_ids = querycl.query(SONIC_COLLECTION, SONIC_BUCKET, text)
return snap_ids
@enforce_types
def flush(snapshot_ids: Generator[str, None, None]):
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
for id in snapshot_ids:
ingestcl.flush_object(SONIC_COLLECTION, SONIC_BUCKET, str(id))

View file

@ -0,0 +1,44 @@
from django.db.models import QuerySet
from archivebox.util import enforce_types
from archivebox.config import ANSI
def log_index_started(url):
print('{green}[*] Indexing url: {} in the search index {reset}'.format(url, **ANSI))
print( )
def get_file_result_content(res, extra_path, use_pwd=False):
if use_pwd:
fpath = f'{res.pwd}/{res.output}'
else:
fpath = f'{res.output}'
if extra_path:
fpath = f'{fpath}/{extra_path}'
with open(fpath, 'r') as file:
data = file.read()
if data:
return [data]
return []
# This should be abstracted by a plugin interface for extractors
@enforce_types
def get_indexable_content(results: QuerySet):
if not results:
return []
# Only use the first method available
res, method = results.first(), results.first().extractor
if method not in ('readability', 'singlefile', 'dom', 'wget'):
return []
# This should come from a plugin interface
if method == 'readability':
return get_file_result_content(res, 'content.txt')
elif method == 'singlefile':
return get_file_result_content(res, '')
elif method == 'dom':
return get_file_result_content(res,'',use_pwd=True)
elif method == 'wget':
return get_file_result_content(res,'',use_pwd=True)

View file

@ -107,6 +107,9 @@
<a href="{% url 'admin:password_change' %}">{% trans 'Change password' %}</a> /
{% endif %}
<a href="{% url 'admin:logout' %}">{% trans 'Log out' %}</a>
|
<a> <span id="snapshotListView" style="cursor: pointer"></span> </a>
<a> <span id="snapshotGridView"style="letter-spacing: -.4em; cursor: pointer;">⣿⣿</span></a>
{% endblock %}
</div>
{% endif %}
@ -179,8 +182,63 @@
});
}
};
function redirectWithQuery(uri){
uri_query = uri + document.location.search;
window.location = uri_query;
};
function selectSnapshotListView(){
localStorage.setItem('currentSnapshotView', 'List');
redirectWithQuery("{% url 'admin:core_snapshot_changelist' %}");
};
function selectSnapshotGridView(){
localStorage.setItem('currentSnapshotView', 'Grid');
redirectWithQuery("{% url 'admin:grid' %}");
};
function setPreferredSnapshotView(view){
urlPath = window.location.pathname;
if((view==="Grid") && urlPath == "{% url 'admin:core_snapshot_changelist' %}"){
selectSnapshotGridView();
}
{% comment %}
else if((view==="List") && urlPath == "{% url 'admin:grid' %}"){
selectSnapshotListView();
}
{% endcomment %}
};
function setupSnapshotViews() {
const preferredSnapshotView = localStorage.getItem('currentSnapshotView');
setPreferredSnapshotView(preferredSnapshotView);
$( document ).ready(function() {
$("#snapshotListView").click(function() {
selectSnapshotListView();
});
$("#snapshotGridView").click(function() {
selectSnapshotGridView();
});
$('input:checkbox').change(function(){
if($(this).is(':checked'))
$(this).parent().parent().parent().parent().addClass('selected-card');
else
$(this).parent().parent().parent().parent().removeClass('selected-card')
});
});
};
$(function () {
fix_actions();
setupSnapshotViews();
});
})(django.jQuery);
</script>

View file

@ -0,0 +1,91 @@
{% extends "admin/base_site.html" %}
{% load i18n admin_urls static admin_list %}
{% load core_tags %}
{% block extrastyle %}
{{ block.super }}
<link rel="stylesheet" type="text/css" href="{% static "admin/css/changelists.css" %}">
{% if cl.formset %}
<link rel="stylesheet" type="text/css" href="{% static "admin/css/forms.css" %}">
{% endif %}
{% if cl.formset or action_form %}
<script src="{% url 'admin:jsi18n' %}"></script>
{% endif %}
{{ media.css }}
{% if not actions_on_top and not actions_on_bottom %}
<style>
#changelist table thead th:first-child {width: inherit}
</style>
{% endif %}
{% endblock %}
{% block extrahead %}
{{ block.super }}
{{ media.js }}
{% endblock %}
{% block bodyclass %}{{ block.super }} app-{{ opts.app_label }} model-{{ opts.model_name }} change-list{% endblock %}
{% if not is_popup %}
{% block breadcrumbs %}
<div class="breadcrumbs">
<a href="{% url 'admin:index' %}">{% translate 'Home' %}</a>
&rsaquo; <a href="{% url 'admin:app_list' app_label=cl.opts.app_label %}">{{ cl.opts.app_config.verbose_name }}</a>
&rsaquo; {{ cl.opts.verbose_name_plural|capfirst }}
</div>
{% endblock %}
{% endif %}
{% block coltype %}{% endblock %}
{% block content %}
<div id="content-main">
{% block object-tools %}
<ul class="object-tools">
{% block object-tools-items %}
{% change_list_object_tools %}
{% endblock %}
</ul>
{% endblock %}
{% if cl.formset and cl.formset.errors %}
<p class="errornote">
{% if cl.formset.total_error_count == 1 %}{% translate "Please correct the error below." %}{% else %}{% translate "Please correct the errors below." %}{% endif %}
</p>
{{ cl.formset.non_form_errors }}
{% endif %}
<div class="module{% if cl.has_filters %} filtered{% endif %}" id="changelist">
<div class="changelist-form-container">
{% block search %}{% search_form cl %}{% endblock %}
{% block date_hierarchy %}{% if cl.date_hierarchy %}{% date_hierarchy cl %}{% endif %}{% endblock %}
<form id="changelist-form" method="post"{% if cl.formset and cl.formset.is_multipart %} enctype="multipart/form-data"{% endif %} novalidate>{% csrf_token %}
{% if cl.formset %}
<div>{{ cl.formset.management_form }}</div>
{% endif %}
{% block result_list %}
{% if action_form and actions_on_top and cl.show_admin_actions %}{% admin_actions %}{% endif %}
{% comment %}
Table grid
{% result_list cl %}
{% endcomment %}
{% snapshots_grid cl %}
{% if action_form and actions_on_bottom and cl.show_admin_actions %}{% admin_actions %}{% endif %}
{% endblock %}
{% block pagination %}{% pagination cl %}{% endblock %}
</form>
</div>
{% block filters %}
{% if cl.has_filters %}
<div id="changelist-filter">
<h2>{% translate 'Filter' %}</h2>
{% if cl.has_active_filters %}<h3 id="changelist-filter-clear">
<a href="{{ cl.clear_all_filters_qs }}">&#10006; {% translate "Clear all filters" %}</a>
</h3>{% endif %}
{% for spec in cl.filter_specs %}{% admin_list_filter cl spec %}{% endfor %}
</div>
{% endif %}
{% endblock %}
</div>
</div>
{% endblock %}

View file

@ -0,0 +1,162 @@
{% load i18n admin_urls static admin_list %}
{% load core_tags %}
{% block extrastyle %}
<style>
* {
-webkit-box-sizing: border-box;
-moz-box-sizing: border-box;
box-sizing: border-box;
}
a {
text-decoration: none;
color: orange;
}
h2 {
color: #000;
margin: 2rem 0 .5rem;
font-size: 1.25rem;
font-weight: 400;
{% comment %} text-transform: uppercase; {% endcomment %}
}
card.img {
display: block;
border: 0;
width: 100%;
height: auto;
}
/*************************** Cards *******************************/
.cards {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); /* see notes below */
grid-auto-rows: minmax(200px, auto);
grid-gap: 1rem;
}
.card {
/*height: 200px;*/
/*background: red;*/
border: 2px solid #e7e7e7;
border-radius: 4px;
-webkit-box-shadow: 0 2px 2px rgba(0, 0, 0, 0.15);
box-shadow: 0 2px 2px rgba(0, 0, 0, 0.15);
display: flex;
/* -webkit-box-orient: vertical; */
/* -webkit-box-direction: normal; */
-ms-flex-direction: column;
flex-direction: column;
position: relative;
color: #5d5e5e;
} /* li item */
.thumbnail img {
height: 100%;
box-sizing: border-box;
max-width: 100%;
max-height: 100%;
width: 100%;
}
.card-content {
font-size: .75rem;
padding: .5rem;
display: flex;
-webkit-box-orient: vertical;
-webkit-box-direction: normal;
-ms-flex-direction: column;
flex-direction: column;
-webkit-box-flex: 1;
-ms-flex: 1;
flex: 1;
}
.card-content h4{
vertical-align:bottom;
margin: 1.2em 0 0em 0;
}
.category {
font-size: .75rem;
text-transform: uppercase;
}
.category {
position: absolute;
top: 5%;
right: 0;
color: #fff;
background: #e74c3c;
padding: 10px 15px;
font-size: 10px;
font-weight: 600;
text-transform: uppercase;
}
.category__01 {
background-color: #50c6db;
}
.tags{
opacity: 0.8;
}
footer {
border-top: 2px solid #e7e7e7;
{% comment %} margin: .5rem 0 0; {% endcomment %}
{% comment %} min-height: 30px; {% endcomment %}
font-size: .5rem;
}
.post-meta {
padding: .3rem;
}
.comments {
margin-left: .5rem;
}
.selected-card{
border: 5px solid #ffaa31;
}
</style>
{% endblock %}
{% block content %}
<section class="cards">
{% for obj in results %}
<article class="card">
<picture class="thumbnail">
<a href="/{{obj.archive_path}}/index.html">
<img class="category__01" src="{% snapshot_image obj%}" alt="" />
</a>
</picture>
<div class="card-content">
{% if obj.tags_str %}
<p class="category category__01 tags">{{obj.tags_str}}</p>
{% endif %}
{% if obj.title %}
<a href="{% url 'admin:core_snapshot_change' obj.id %}">
<h4>{{obj.title|truncatechars:55 }}</h4>
</a>
{% endif %}
{% comment %} <p> TEXT If needed.</p> {% endcomment %}
</div><!-- .card-content -->
<footer>
<div class="post-meta">
<span style="float:right;"><input type="checkbox" name="_selected_action" value="{{obj.pk}}" class="action-select"></span>
<span class="timestamp">&#128337 {{obj.added}}</span>
<span class="comments">📖{{obj.num_outputs}}</span>
<span>🗄️{{ obj.archive_size | file_size }}</span>
</div>
</footer>
</article>
{% endfor %}
</section>
{% endblock %}

View file

@ -187,13 +187,6 @@
display: none;
}
body[data-status~=finished] .files-spinner {
display: none;
}
/*body[data-status~=running] .in-progress {
display: inline-block;
}*/
tr td a.favicon img {
padding-left: 6px;
padding-right: 12px;
@ -224,12 +217,10 @@
color: black;
}
tr td a.exists-True {
opacity: 1;
}
tr td a.exists-False {
opacity: 0.1;
filter: grayscale(100%);
.exists-False {
opacity: 0.1;
filter: grayscale(100%);
pointer-events: none;
}
</style>
<link rel="stylesheet" href="{% static 'bootstrap.min.css' %}">
@ -280,10 +271,9 @@
<br />
<center>
<small>
Archive created using <a href="https://github.com/ArchiveBox/ArchiveBox" title="Github">ArchiveBox</a> &nbsp; |
&nbsp;
Download index as <a href="index.json" title="JSON summary of archived links.">JSON</a>
<br /><br />
Archive created using <a href="https://github.com/ArchiveBox/ArchiveBox" title="Github">ArchiveBox</a> version
<a href="https://github.com/ArchiveBox/ArchiveBox/releases" title="Releases">v{{VERSION}}</a>.
<br/><br/>
{{FOOTER_INFO}}
</small>
</center>

View file

@ -2,44 +2,25 @@
{% load static %}
{% block body %}
<br>
<form action="{% url 'public-index' %}" method="get">
<input name="q" type="text" placeholder="Search...">
<button type="submit">Search</button>
<button onclick="location.href='{% url 'public-index' %}'" type="button">
Reload Index</button>
</form>
<table id="table-bookmarks">
<thead>
<tr>
<th style="width: 100px;">Bookmarked</th>
<th style="width: 26vw;">Saved Link ({{num_links}})</th>
<th style="width: 140px">Files</th>
<th style="width: 16vw;whitespace:nowrap;overflow-x:hidden;">Original URL</th>
</tr>
</thead>
<br>
<form action="{% url 'public-index' %}" method="get">
<input name="q" type="text" placeholder="Search...">
<button type="submit">Search</button>
<button onclick="location.href='{% url 'public-index' %}'" type="button">
Reload Index</button>
</form>
<table id="table-bookmarks">
<thead>
<tr>
<th style="width: 100px;">Bookmarked</th>
<th style="width: 26vw;">Snapshot ({{object_list|length}})</th>
<th style="width: 140px">Files</th>
<th style="width: 16vw;whitespace:nowrap;overflow-x:hidden;">Original URL</th>
</tr>
</thead>
<tbody>
{% for link in object_list %}
<tr>
<td title="{{link.timestamp}}">{{link.added}}</td>
<td class="title-col">
{% if link.is_archived %}
<a href="archive/{{link.timestamp}}/index.html"><img src="archive/{{link.timestamp}}/favicon.ico" class="link-favicon" decoding="async"></a>
{% else %}
<a href="archive/{{link.timestamp}}/index.html"><img src="{% static 'spinner.gif' %}" class="link-favicon" decoding="async"></a>
{% endif %}
<a href="archive/{{link.timestamp}}/index.html" title="{{link.title}}">
<span data-title-for="{{link.url}}" data-archived="{{link.is_archived}}">{{link.title|default:'Loading...'}}</span>
<small style="float:right">{{link.tags_str}}</small>
</a>
</td>
<td>
<a href="archive/{{link.timestamp}}/index.html">📄
<span data-number-for="{{link.url}}" title="Fetching any missing files...">{{link.icons}} <img src="{% static 'spinner.gif' %}" class="files-spinner" decoding="async"/></span>
</a>
</td>
<td style="text-align:left"><a href="{{link.url}}">{{link.url}}</a></td>
</tr>
{% include 'main_index_row.html' with link=link %}
{% endfor %}
</tbody>
</table>
@ -59,6 +40,12 @@
<a href="{% url 'public-index' %}?page={{ page_obj.paginator.num_pages }}">last &raquo;</a>
{% endif %}
</span>
<br>
{% if page_obj.has_next %}
<a href="{% url 'public-index' %}?page={{ page_obj.next_page_number }}">next </a>
<a href="{% url 'public-index' %}?page={{ page_obj.paginator.num_pages }}">last &raquo;</a>
{% endif %}
</span>
<br>
</center>
{% endblock %}
{% endblock %}

View file

@ -1,7 +1,7 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>$title</title>
<title>{{title}}</title>
<meta charset="utf-8" name="viewport" content="width=device-width, initial-scale=1">
<style>
html, body {
@ -249,13 +249,13 @@
<div class="col-lg-8">
<img src="favicon.ico" alt="Favicon">
&nbsp;&nbsp;
$title
{{title}}
&nbsp;&nbsp;
<a href="#" class="header-toggle"></a>
<br/>
<small>
<a href="$url" class="header-url" title="$url">
$url_str
<a href="{{url}}" class="header-url" title="{{url}}">
{{url_str}}
</a>
</small>
</div>
@ -266,41 +266,41 @@
<div class="col-lg-4">
<div title="Date bookmarked or imported" class="info-chunk">
<h5>Added</h5>
$bookmarked_date
{{bookmarked_date}}
</div>
<div title="Date first archived" class="info-chunk">
<h5>First Archived</h5>
$oldest_archive_date
{{oldest_archive_date}}
</div>
<div title="Date last checked" class="info-chunk">
<h5>Last Checked</h5>
$updated_date
{{updated_date}}
</div>
</div>
<div class="col-lg-4">
<div class="info-chunk">
<h5>Type</h5>
<div class="badge badge-default">$extension</div>
<div class="badge badge-default">{{extension}}</div>
</div>
<div class="info-chunk">
<h5>Tags</h5>
<div class="badge badge-warning">$tags</div>
<div class="badge badge-warning">{{tags}}</div>
</div>
<div class="info-chunk">
<h5>Status</h5>
<div class="badge badge-$status_color">$status</div>
<div class="badge badge-{{status_color}}">{{status}}</div>
</div>
<div class="info-chunk">
<h5>Saved</h5>
$num_outputs
{{num_outputs}}
</div>
<div class="info-chunk">
<h5>Errors</h5>
$num_failures
{{num_failures}}
</div>
<div class="info-chunk">
<h5>Size</h5>
$size
{{size}}
</div>
</div>
<div class="col-lg-4">
@ -318,97 +318,97 @@
<div class="row header-bottom-frames">
<div class="col-lg-3">
<div class="card selected-card">
<iframe class="card-img-top" src="$archive_url" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
<iframe class="card-img-top" src="{{archive_url}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
<div class="card-body">
<a href="$archive_url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<a href="{{archive_url}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$archive_url" target="preview"><h4 class="card-title">Wget &gt; WARC</h4></a>
<p class="card-text">archive/$domain</p>
<a href="{{archive_url}}" target="preview"><h4 class="card-title">Wget &gt; WARC</h4></a>
<p class="card-text">archive/{{domain}}</p>
</div>
</div>
</div>
<div class="col-lg-3">
<div class="card">
<iframe class="card-img-top" src="$singlefile_path" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
<iframe class="card-img-top" src="{{singlefile_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
<div class="card-body">
<a href="$singlefile_path" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<a href="{{singlefile_path}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$singlefile_path" target="preview"><h4 class="card-title">Chrome &gt; SingleFile</h4></a>
<a href="{{singlefile_path}}" target="preview"><h4 class="card-title">Chrome &gt; SingleFile</h4></a>
<p class="card-text">archive/singlefile.html</p>
</div>
</div>
</div>
<div class="col-lg-3">
<div class="card">
<iframe class="card-img-top" src="$archive_org_path" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
<iframe class="card-img-top" src="{{archive_org_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
<div class="card-body">
<a href="$archive_org_path" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<a href="{{archive_org_path}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$archive_org_path" target="preview"><h4 class="card-title">Archive.Org</h4></a>
<a href="{{archive_org_path}}" target="preview"><h4 class="card-title">Archive.Org</h4></a>
<p class="card-text">web.archive.org/web/...</p>
</div>
</div>
</div>
<div class="col-lg-3">
<div class="card">
<iframe class="card-img-top" src="$url" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
<iframe class="card-img-top" src="{{url}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
<div class="card-body">
<a href="$url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<a href="{{url}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$url" target="preview"><h4 class="card-title">Original</h4></a>
<p class="card-text">$domain</p>
<a href="{{url}}" target="preview"><h4 class="card-title">Original</h4></a>
<p class="card-text">{{domain}}</p>
</div>
</div>
</div>
<br/>
<div class="col-lg-3">
<div class="card">
<iframe class="card-img-top pdf-frame" src="$pdf_path" scrolling="no"></iframe>
<iframe class="card-img-top pdf-frame" src="{{pdf_path}}" scrolling="no"></iframe>
<div class="card-body">
<a href="$pdf_path" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<a href="{{pdf_path}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$pdf_path" target="preview" id="pdf-btn"><h4 class="card-title">Chrome &gt; PDF</h4></a>
<a href="{{pdf_path}}" target="preview" id="pdf-btn"><h4 class="card-title">Chrome &gt; PDF</h4></a>
<p class="card-text">archive/output.pdf</p>
</div>
</div>
</div>
<div class="col-lg-3">
<div class="card">
<img class="card-img-top screenshot" src="$screenshot_path"></iframe>
<img class="card-img-top screenshot" src="{{screenshot_path}}"></iframe>
<div class="card-body">
<a href="$screenshot_path" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<a href="{{screenshot_path}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$screenshot_path" target="preview"><h4 class="card-title">Chrome &gt; Screenshot</h4></a>
<a href="{{screenshot_path}}" target="preview"><h4 class="card-title">Chrome &gt; Screenshot</h4></a>
<p class="card-text">archive/screenshot.png</p>
</div>
</div>
</div>
<div class="col-lg-3">
<div class="card">
<iframe class="card-img-top" src="$dom_path" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
<iframe class="card-img-top" src="{{dom_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
<div class="card-body">
<a href="$dom_path" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<a href="{{dom_path}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$dom_path" target="preview"><h4 class="card-title">Chrome &gt; HTML</h4></a>
<a href="{{dom_path}}" target="preview"><h4 class="card-title">Chrome &gt; HTML</h4></a>
<p class="card-text">archive/output.html</p>
</div>
</div>
</div>
<div class="col-lg-3">
<div class="card">
<iframe class="card-img-top" src="$readability_path" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
<iframe class="card-img-top" src="{{readability_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
<div class="card-body">
<a href="$readability_path" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<a href="{{readability_path}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$readability_path" target="preview"><h4 class="card-title">Readability</h4></a>
<a href="{{readability_path}}" target="preview"><h4 class="card-title">Readability</h4></a>
<p class="card-text">archive/readability/...</p>
</div>
</div>
@ -416,12 +416,12 @@
<br/>
<div class="col-lg-3">
<div class="card">
<iframe class="card-img-top" src="$mercury_path" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
<iframe class="card-img-top" src="{{mercury_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
<div class="card-body">
<a href="$mercury_path" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<a href="{{mercury_path}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$mercury_path" target="preview"><h4 class="card-title">mercury</h4></a>
<a href="{{mercury_path}}" target="preview"><h4 class="card-title">mercury</h4></a>
<p class="card-text">archive/mercury/...</p>
</div>
</div>
@ -429,7 +429,7 @@
</div>
</div>
</header>
<iframe sandbox="allow-same-origin allow-scripts allow-forms" class="full-page-iframe" src="$archive_url" name="preview"></iframe>
<iframe sandbox="allow-same-origin allow-scripts allow-forms" class="full-page-iframe" src="{{archive_url}}" name="preview"></iframe>
<script
src="https://code.jquery.com/jquery-3.2.1.slim.min.js"

View file

@ -161,12 +161,6 @@
.in-progress {
display: none;
}
body[data-status~=finished] .files-spinner {
display: none;
}
/*body[data-status~=running] .in-progress {
display: inline-block;
}*/
tr td a.favicon img {
padding-left: 6px;
padding-right: 12px;
@ -210,7 +204,7 @@
});
</script>
</head>
<body data-status="finished">
<body>
<header>
<div class="header-top container-fluid">
<div class="row nav">
@ -228,6 +222,7 @@
</div>
</div>
</header>
<table id="table-bookmarks">
<thead>
<tr>
@ -239,26 +234,7 @@
</thead>
<tbody>
{% for link in links %}
<tr>
<td title="{{link.timestamp}}">{{link.bookmarked_date}}</td>
<td class="title-col">
{% if link.is_archived %}
<a href="archive/{{link.timestamp}}/index.html"><img src="archive/{{link.timestamp}}/favicon.ico" class="link-favicon" decoding="async"></a>
{% else %}
<a href="archive/{{link.timestamp}}/index.html"><img src="{% static 'spinner.gif' %}" class="link-favicon" decoding="async"></a>
{% endif %}
<a href="archive/{{link.timestamp}}/{{link.canonical_outputs.wget_path}}" title="{{link.title}}">
<span data-title-for="{{link.url}}" data-archived="{{link.is_archived}}">{{link.title|default:'Loading...'}}</span>
<small style="float:right">{{link.tags|default:''}}</small>
</a>
</td>
<td>
<a href="archive/{{link.timestamp}}/index.html">📄
<span data-number-for="{{link.url}}" title="Fetching any missing files...">{{link.num_outputs}} <img src="{% static 'spinner.gif' %}" class="files-spinner" decoding="async"/></span>
</a>
</td>
<td style="text-align:left"><a href="{{link.url}}">{{link.url}}</a></td>
</tr>
{% include 'main_index_row.html' with link=link %}
{% endfor %}
</tbody>
</table>

View file

@ -4,17 +4,21 @@
<title>Archived Sites</title>
<meta charset="utf-8" name="viewport" content="width=device-width, initial-scale=1">
</head>
<body data-status="$status">
<body data-status="{{status}}">
<table id="table-bookmarks">
<thead>
<tr class="thead-tr">
<th style="width: 100px;">Bookmarked</th>
<th style="width: 26vw;">Saved Link ($num_links)</th>
<th style="width: 26vw;">Saved Link ({{num_links}})</th>
<th style="width: 50px">Files</th>
<th style="width: 16vw;whitespace:nowrap;overflow-x:hidden;">Original URL</th>
</tr>
</thead>
<tbody>$rows</tbody>
<tbody>
{% for link in links %}
{% include "main_index_row.html" with link=link %}
{% endfor %}
</tbody>
</table>
</body>
</html>

View file

@ -0,0 +1,22 @@
{% load static %}
<tr>
<td title="{{link.timestamp}}"> {% if link.bookmarked_date %} {{ link.bookmarked_date }} {% else %} {{ link.added }} {% endif %} </td>
<td class="title-col">
{% if link.is_archived %}
<a href="archive/{{link.timestamp}}/index.html"><img src="archive/{{link.timestamp}}/favicon.ico" class="link-favicon" decoding="async"></a>
{% else %}
<a href="archive/{{link.timestamp}}/index.html"><img src="{% static 'spinner.gif' %}" class="link-favicon" decoding="async"></a>
{% endif %}
<a href="archive/{{link.timestamp}}/{{link.canonical_outputs.wget_path}}" title="{{link.title}}">
<span data-title-for="{{link.url}}" data-archived="{{link.is_archived}}">{{link.title|default:'Loading...'}}</span>
<small style="float:right">{% if link.tags_str != None %} {{link.tags_str|default:''}} {% else %} {{ link.tags|default:'' }} {% endif %}</small>
</a>
</td>
<td>
<a href="archive/{{link.timestamp}}/index.html">📄
<span data-number-for="{{link.url}}" title="Fetching any missing files...">{% if link.icons %} {{link.icons}} {% else %} {{ link.num_outputs}} {% endif %}<img src="{% static 'spinner.gif' %}" class="files-spinner" decoding="async"/></span>
</a>
</td>
<td style="text-align:left"><a href="{{link.url}}">{{link.url}}</a></td>
</tr>

View file

@ -91,6 +91,7 @@ body.model-snapshot.change-list #content .object-tools {
padding: 0px;
background: none;
margin-right: 0px;
width: auto;
}
#content #changelist .actions .button {

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

View file

@ -127,12 +127,6 @@
.in-progress {
display: none;
}
body[data-status~=finished] .files-spinner {
display: none;
}
/*body[data-status~=running] .in-progress {
display: inline-block;
}*/
tr td a.favicon img {
padding-left: 6px;
padding-right: 12px;
@ -176,7 +170,7 @@
});
</script>
</head>
<body data-status="$status">
<body>
<header>
<div class="header-top container-fluid">
<div class="row nav">
@ -198,7 +192,7 @@
<thead>
<tr class="thead-tr">
<th style="width: 100px;">Bookmarked</th>
<th style="width: 26vw;">Saved Link ($num_links)</th>
<th style="width: 26vw;">Snapshot ($num_links)</th>
<th style="width: 50px">Files</th>
<th style="width: 16vw;whitespace:nowrap;overflow-x:hidden;">Original URL</th>
</tr>
@ -209,9 +203,8 @@
<br/>
<center>
<small>
Archive created using <a href="https://github.com/ArchiveBox/ArchiveBox" title="Github">ArchiveBox</a>
version <a href="https://github.com/ArchiveBox/ArchiveBox/tree/v$version" title="Git commit">v$version</a> &nbsp; | &nbsp;
Download index as <a href="index.json" title="JSON summary of archived links.">JSON</a>
Created using <a href="https://github.com/ArchiveBox/ArchiveBox" title="Github">ArchiveBox</a>
version <a href="https://github.com/ArchiveBox/ArchiveBox/releases" title="Releases">v$version</a>.
<br/><br/>
$footer_info
</small>

View file

@ -9,7 +9,7 @@
</td>
<td>
<a href="$archive_path/index.html">📄
<span data-number-for="$url" title="Fetching any missing files...">$num_outputs <img src="static/spinner.gif" class="files-spinner" decoding="async"/></span>
<span data-number-for="$url" title="Number of extractor outputs">$num_outputs</span>
</a>
</td>
<td style="text-align:left"><a href="$url">$url</a></td>

View file

@ -1,2 +0,0 @@
User-agent: *
Disallow: /

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

File diff suppressed because one or more lines are too long

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 KiB

File diff suppressed because one or more lines are too long

View file

@ -1,166 +0,0 @@
/*!
DataTables 1.10.19
©2008-2018 SpryMedia Ltd - datatables.net/license
*/
(function(h){"function"===typeof define&&define.amd?define(["jquery"],function(E){return h(E,window,document)}):"object"===typeof exports?module.exports=function(E,H){E||(E=window);H||(H="undefined"!==typeof window?require("jquery"):require("jquery")(E));return h(H,E,E.document)}:h(jQuery,window,document)})(function(h,E,H,k){function Z(a){var b,c,d={};h.each(a,function(e){if((b=e.match(/^([^A-Z]+?)([A-Z])/))&&-1!=="a aa ai ao as b fn i m o s ".indexOf(b[1]+" "))c=e.replace(b[0],b[2].toLowerCase()),
d[c]=e,"o"===b[1]&&Z(a[e])});a._hungarianMap=d}function J(a,b,c){a._hungarianMap||Z(a);var d;h.each(b,function(e){d=a._hungarianMap[e];if(d!==k&&(c||b[d]===k))"o"===d.charAt(0)?(b[d]||(b[d]={}),h.extend(!0,b[d],b[e]),J(a[d],b[d],c)):b[d]=b[e]})}function Ca(a){var b=n.defaults.oLanguage,c=b.sDecimal;c&&Da(c);if(a){var d=a.sZeroRecords;!a.sEmptyTable&&(d&&"No data available in table"===b.sEmptyTable)&&F(a,a,"sZeroRecords","sEmptyTable");!a.sLoadingRecords&&(d&&"Loading..."===b.sLoadingRecords)&&F(a,
a,"sZeroRecords","sLoadingRecords");a.sInfoThousands&&(a.sThousands=a.sInfoThousands);(a=a.sDecimal)&&c!==a&&Da(a)}}function fb(a){A(a,"ordering","bSort");A(a,"orderMulti","bSortMulti");A(a,"orderClasses","bSortClasses");A(a,"orderCellsTop","bSortCellsTop");A(a,"order","aaSorting");A(a,"orderFixed","aaSortingFixed");A(a,"paging","bPaginate");A(a,"pagingType","sPaginationType");A(a,"pageLength","iDisplayLength");A(a,"searching","bFilter");"boolean"===typeof a.sScrollX&&(a.sScrollX=a.sScrollX?"100%":
"");"boolean"===typeof a.scrollX&&(a.scrollX=a.scrollX?"100%":"");if(a=a.aoSearchCols)for(var b=0,c=a.length;b<c;b++)a[b]&&J(n.models.oSearch,a[b])}function gb(a){A(a,"orderable","bSortable");A(a,"orderData","aDataSort");A(a,"orderSequence","asSorting");A(a,"orderDataType","sortDataType");var b=a.aDataSort;"number"===typeof b&&!h.isArray(b)&&(a.aDataSort=[b])}function hb(a){if(!n.__browser){var b={};n.__browser=b;var c=h("<div/>").css({position:"fixed",top:0,left:-1*h(E).scrollLeft(),height:1,width:1,
overflow:"hidden"}).append(h("<div/>").css({position:"absolute",top:1,left:1,width:100,overflow:"scroll"}).append(h("<div/>").css({width:"100%",height:10}))).appendTo("body"),d=c.children(),e=d.children();b.barWidth=d[0].offsetWidth-d[0].clientWidth;b.bScrollOversize=100===e[0].offsetWidth&&100!==d[0].clientWidth;b.bScrollbarLeft=1!==Math.round(e.offset().left);b.bBounding=c[0].getBoundingClientRect().width?!0:!1;c.remove()}h.extend(a.oBrowser,n.__browser);a.oScroll.iBarWidth=n.__browser.barWidth}
function ib(a,b,c,d,e,f){var g,j=!1;c!==k&&(g=c,j=!0);for(;d!==e;)a.hasOwnProperty(d)&&(g=j?b(g,a[d],d,a):a[d],j=!0,d+=f);return g}function Ea(a,b){var c=n.defaults.column,d=a.aoColumns.length,c=h.extend({},n.models.oColumn,c,{nTh:b?b:H.createElement("th"),sTitle:c.sTitle?c.sTitle:b?b.innerHTML:"",aDataSort:c.aDataSort?c.aDataSort:[d],mData:c.mData?c.mData:d,idx:d});a.aoColumns.push(c);c=a.aoPreSearchCols;c[d]=h.extend({},n.models.oSearch,c[d]);ka(a,d,h(b).data())}function ka(a,b,c){var b=a.aoColumns[b],
d=a.oClasses,e=h(b.nTh);if(!b.sWidthOrig){b.sWidthOrig=e.attr("width")||null;var f=(e.attr("style")||"").match(/width:\s*(\d+[pxem%]+)/);f&&(b.sWidthOrig=f[1])}c!==k&&null!==c&&(gb(c),J(n.defaults.column,c),c.mDataProp!==k&&!c.mData&&(c.mData=c.mDataProp),c.sType&&(b._sManualType=c.sType),c.className&&!c.sClass&&(c.sClass=c.className),c.sClass&&e.addClass(c.sClass),h.extend(b,c),F(b,c,"sWidth","sWidthOrig"),c.iDataSort!==k&&(b.aDataSort=[c.iDataSort]),F(b,c,"aDataSort"));var g=b.mData,j=S(g),i=b.mRender?
S(b.mRender):null,c=function(a){return"string"===typeof a&&-1!==a.indexOf("@")};b._bAttrSrc=h.isPlainObject(g)&&(c(g.sort)||c(g.type)||c(g.filter));b._setter=null;b.fnGetData=function(a,b,c){var d=j(a,b,k,c);return i&&b?i(d,b,a,c):d};b.fnSetData=function(a,b,c){return N(g)(a,b,c)};"number"!==typeof g&&(a._rowReadObject=!0);a.oFeatures.bSort||(b.bSortable=!1,e.addClass(d.sSortableNone));a=-1!==h.inArray("asc",b.asSorting);c=-1!==h.inArray("desc",b.asSorting);!b.bSortable||!a&&!c?(b.sSortingClass=d.sSortableNone,
b.sSortingClassJUI=""):a&&!c?(b.sSortingClass=d.sSortableAsc,b.sSortingClassJUI=d.sSortJUIAscAllowed):!a&&c?(b.sSortingClass=d.sSortableDesc,b.sSortingClassJUI=d.sSortJUIDescAllowed):(b.sSortingClass=d.sSortable,b.sSortingClassJUI=d.sSortJUI)}function $(a){if(!1!==a.oFeatures.bAutoWidth){var b=a.aoColumns;Fa(a);for(var c=0,d=b.length;c<d;c++)b[c].nTh.style.width=b[c].sWidth}b=a.oScroll;(""!==b.sY||""!==b.sX)&&la(a);r(a,null,"column-sizing",[a])}function aa(a,b){var c=ma(a,"bVisible");return"number"===
typeof c[b]?c[b]:null}function ba(a,b){var c=ma(a,"bVisible"),c=h.inArray(b,c);return-1!==c?c:null}function V(a){var b=0;h.each(a.aoColumns,function(a,d){d.bVisible&&"none"!==h(d.nTh).css("display")&&b++});return b}function ma(a,b){var c=[];h.map(a.aoColumns,function(a,e){a[b]&&c.push(e)});return c}function Ga(a){var b=a.aoColumns,c=a.aoData,d=n.ext.type.detect,e,f,g,j,i,h,l,q,t;e=0;for(f=b.length;e<f;e++)if(l=b[e],t=[],!l.sType&&l._sManualType)l.sType=l._sManualType;else if(!l.sType){g=0;for(j=d.length;g<
j;g++){i=0;for(h=c.length;i<h;i++){t[i]===k&&(t[i]=B(a,i,e,"type"));q=d[g](t[i],a);if(!q&&g!==d.length-1)break;if("html"===q)break}if(q){l.sType=q;break}}l.sType||(l.sType="string")}}function jb(a,b,c,d){var e,f,g,j,i,m,l=a.aoColumns;if(b)for(e=b.length-1;0<=e;e--){m=b[e];var q=m.targets!==k?m.targets:m.aTargets;h.isArray(q)||(q=[q]);f=0;for(g=q.length;f<g;f++)if("number"===typeof q[f]&&0<=q[f]){for(;l.length<=q[f];)Ea(a);d(q[f],m)}else if("number"===typeof q[f]&&0>q[f])d(l.length+q[f],m);else if("string"===
typeof q[f]){j=0;for(i=l.length;j<i;j++)("_all"==q[f]||h(l[j].nTh).hasClass(q[f]))&&d(j,m)}}if(c){e=0;for(a=c.length;e<a;e++)d(e,c[e])}}function O(a,b,c,d){var e=a.aoData.length,f=h.extend(!0,{},n.models.oRow,{src:c?"dom":"data",idx:e});f._aData=b;a.aoData.push(f);for(var g=a.aoColumns,j=0,i=g.length;j<i;j++)g[j].sType=null;a.aiDisplayMaster.push(e);b=a.rowIdFn(b);b!==k&&(a.aIds[b]=f);(c||!a.oFeatures.bDeferRender)&&Ha(a,e,c,d);return e}function na(a,b){var c;b instanceof h||(b=h(b));return b.map(function(b,
e){c=Ia(a,e);return O(a,c.data,e,c.cells)})}function B(a,b,c,d){var e=a.iDraw,f=a.aoColumns[c],g=a.aoData[b]._aData,j=f.sDefaultContent,i=f.fnGetData(g,d,{settings:a,row:b,col:c});if(i===k)return a.iDrawError!=e&&null===j&&(K(a,0,"Requested unknown parameter "+("function"==typeof f.mData?"{function}":"'"+f.mData+"'")+" for row "+b+", column "+c,4),a.iDrawError=e),j;if((i===g||null===i)&&null!==j&&d!==k)i=j;else if("function"===typeof i)return i.call(g);return null===i&&"display"==d?"":i}function kb(a,
b,c,d){a.aoColumns[c].fnSetData(a.aoData[b]._aData,d,{settings:a,row:b,col:c})}function Ja(a){return h.map(a.match(/(\\.|[^\.])+/g)||[""],function(a){return a.replace(/\\\./g,".")})}function S(a){if(h.isPlainObject(a)){var b={};h.each(a,function(a,c){c&&(b[a]=S(c))});return function(a,c,f,g){var j=b[c]||b._;return j!==k?j(a,c,f,g):a}}if(null===a)return function(a){return a};if("function"===typeof a)return function(b,c,f,g){return a(b,c,f,g)};if("string"===typeof a&&(-1!==a.indexOf(".")||-1!==a.indexOf("[")||
-1!==a.indexOf("("))){var c=function(a,b,f){var g,j;if(""!==f){j=Ja(f);for(var i=0,m=j.length;i<m;i++){f=j[i].match(ca);g=j[i].match(W);if(f){j[i]=j[i].replace(ca,"");""!==j[i]&&(a=a[j[i]]);g=[];j.splice(0,i+1);j=j.join(".");if(h.isArray(a)){i=0;for(m=a.length;i<m;i++)g.push(c(a[i],b,j))}a=f[0].substring(1,f[0].length-1);a=""===a?g:g.join(a);break}else if(g){j[i]=j[i].replace(W,"");a=a[j[i]]();continue}if(null===a||a[j[i]]===k)return k;a=a[j[i]]}}return a};return function(b,e){return c(b,e,a)}}return function(b){return b[a]}}
function N(a){if(h.isPlainObject(a))return N(a._);if(null===a)return function(){};if("function"===typeof a)return function(b,d,e){a(b,"set",d,e)};if("string"===typeof a&&(-1!==a.indexOf(".")||-1!==a.indexOf("[")||-1!==a.indexOf("("))){var b=function(a,d,e){var e=Ja(e),f;f=e[e.length-1];for(var g,j,i=0,m=e.length-1;i<m;i++){g=e[i].match(ca);j=e[i].match(W);if(g){e[i]=e[i].replace(ca,"");a[e[i]]=[];f=e.slice();f.splice(0,i+1);g=f.join(".");if(h.isArray(d)){j=0;for(m=d.length;j<m;j++)f={},b(f,d[j],g),
a[e[i]].push(f)}else a[e[i]]=d;return}j&&(e[i]=e[i].replace(W,""),a=a[e[i]](d));if(null===a[e[i]]||a[e[i]]===k)a[e[i]]={};a=a[e[i]]}if(f.match(W))a[f.replace(W,"")](d);else a[f.replace(ca,"")]=d};return function(c,d){return b(c,d,a)}}return function(b,d){b[a]=d}}function Ka(a){return D(a.aoData,"_aData")}function oa(a){a.aoData.length=0;a.aiDisplayMaster.length=0;a.aiDisplay.length=0;a.aIds={}}function pa(a,b,c){for(var d=-1,e=0,f=a.length;e<f;e++)a[e]==b?d=e:a[e]>b&&a[e]--; -1!=d&&c===k&&a.splice(d,
1)}function da(a,b,c,d){var e=a.aoData[b],f,g=function(c,d){for(;c.childNodes.length;)c.removeChild(c.firstChild);c.innerHTML=B(a,b,d,"display")};if("dom"===c||(!c||"auto"===c)&&"dom"===e.src)e._aData=Ia(a,e,d,d===k?k:e._aData).data;else{var j=e.anCells;if(j)if(d!==k)g(j[d],d);else{c=0;for(f=j.length;c<f;c++)g(j[c],c)}}e._aSortData=null;e._aFilterData=null;g=a.aoColumns;if(d!==k)g[d].sType=null;else{c=0;for(f=g.length;c<f;c++)g[c].sType=null;La(a,e)}}function Ia(a,b,c,d){var e=[],f=b.firstChild,g,
j,i=0,m,l=a.aoColumns,q=a._rowReadObject,d=d!==k?d:q?{}:[],t=function(a,b){if("string"===typeof a){var c=a.indexOf("@");-1!==c&&(c=a.substring(c+1),N(a)(d,b.getAttribute(c)))}},G=function(a){if(c===k||c===i)j=l[i],m=h.trim(a.innerHTML),j&&j._bAttrSrc?(N(j.mData._)(d,m),t(j.mData.sort,a),t(j.mData.type,a),t(j.mData.filter,a)):q?(j._setter||(j._setter=N(j.mData)),j._setter(d,m)):d[i]=m;i++};if(f)for(;f;){g=f.nodeName.toUpperCase();if("TD"==g||"TH"==g)G(f),e.push(f);f=f.nextSibling}else{e=b.anCells;
f=0;for(g=e.length;f<g;f++)G(e[f])}if(b=b.firstChild?b:b.nTr)(b=b.getAttribute("id"))&&N(a.rowId)(d,b);return{data:d,cells:e}}function Ha(a,b,c,d){var e=a.aoData[b],f=e._aData,g=[],j,i,m,l,q;if(null===e.nTr){j=c||H.createElement("tr");e.nTr=j;e.anCells=g;j._DT_RowIndex=b;La(a,e);l=0;for(q=a.aoColumns.length;l<q;l++){m=a.aoColumns[l];i=c?d[l]:H.createElement(m.sCellType);i._DT_CellIndex={row:b,column:l};g.push(i);if((!c||m.mRender||m.mData!==l)&&(!h.isPlainObject(m.mData)||m.mData._!==l+".display"))i.innerHTML=
B(a,b,l,"display");m.sClass&&(i.className+=" "+m.sClass);m.bVisible&&!c?j.appendChild(i):!m.bVisible&&c&&i.parentNode.removeChild(i);m.fnCreatedCell&&m.fnCreatedCell.call(a.oInstance,i,B(a,b,l),f,b,l)}r(a,"aoRowCreatedCallback",null,[j,f,b,g])}e.nTr.setAttribute("role","row")}function La(a,b){var c=b.nTr,d=b._aData;if(c){var e=a.rowIdFn(d);e&&(c.id=e);d.DT_RowClass&&(e=d.DT_RowClass.split(" "),b.__rowc=b.__rowc?qa(b.__rowc.concat(e)):e,h(c).removeClass(b.__rowc.join(" ")).addClass(d.DT_RowClass));
d.DT_RowAttr&&h(c).attr(d.DT_RowAttr);d.DT_RowData&&h(c).data(d.DT_RowData)}}function lb(a){var b,c,d,e,f,g=a.nTHead,j=a.nTFoot,i=0===h("th, td",g).length,m=a.oClasses,l=a.aoColumns;i&&(e=h("<tr/>").appendTo(g));b=0;for(c=l.length;b<c;b++)f=l[b],d=h(f.nTh).addClass(f.sClass),i&&d.appendTo(e),a.oFeatures.bSort&&(d.addClass(f.sSortingClass),!1!==f.bSortable&&(d.attr("tabindex",a.iTabIndex).attr("aria-controls",a.sTableId),Ma(a,f.nTh,b))),f.sTitle!=d[0].innerHTML&&d.html(f.sTitle),Na(a,"header")(a,d,
f,m);i&&ea(a.aoHeader,g);h(g).find(">tr").attr("role","row");h(g).find(">tr>th, >tr>td").addClass(m.sHeaderTH);h(j).find(">tr>th, >tr>td").addClass(m.sFooterTH);if(null!==j){a=a.aoFooter[0];b=0;for(c=a.length;b<c;b++)f=l[b],f.nTf=a[b].cell,f.sClass&&h(f.nTf).addClass(f.sClass)}}function fa(a,b,c){var d,e,f,g=[],j=[],i=a.aoColumns.length,m;if(b){c===k&&(c=!1);d=0;for(e=b.length;d<e;d++){g[d]=b[d].slice();g[d].nTr=b[d].nTr;for(f=i-1;0<=f;f--)!a.aoColumns[f].bVisible&&!c&&g[d].splice(f,1);j.push([])}d=
0;for(e=g.length;d<e;d++){if(a=g[d].nTr)for(;f=a.firstChild;)a.removeChild(f);f=0;for(b=g[d].length;f<b;f++)if(m=i=1,j[d][f]===k){a.appendChild(g[d][f].cell);for(j[d][f]=1;g[d+i]!==k&&g[d][f].cell==g[d+i][f].cell;)j[d+i][f]=1,i++;for(;g[d][f+m]!==k&&g[d][f].cell==g[d][f+m].cell;){for(c=0;c<i;c++)j[d+c][f+m]=1;m++}h(g[d][f].cell).attr("rowspan",i).attr("colspan",m)}}}}function P(a){var b=r(a,"aoPreDrawCallback","preDraw",[a]);if(-1!==h.inArray(!1,b))C(a,!1);else{var b=[],c=0,d=a.asStripeClasses,e=
d.length,f=a.oLanguage,g=a.iInitDisplayStart,j="ssp"==y(a),i=a.aiDisplay;a.bDrawing=!0;g!==k&&-1!==g&&(a._iDisplayStart=j?g:g>=a.fnRecordsDisplay()?0:g,a.iInitDisplayStart=-1);var g=a._iDisplayStart,m=a.fnDisplayEnd();if(a.bDeferLoading)a.bDeferLoading=!1,a.iDraw++,C(a,!1);else if(j){if(!a.bDestroying&&!mb(a))return}else a.iDraw++;if(0!==i.length){f=j?a.aoData.length:m;for(j=j?0:g;j<f;j++){var l=i[j],q=a.aoData[l];null===q.nTr&&Ha(a,l);var t=q.nTr;if(0!==e){var G=d[c%e];q._sRowStripe!=G&&(h(t).removeClass(q._sRowStripe).addClass(G),
q._sRowStripe=G)}r(a,"aoRowCallback",null,[t,q._aData,c,j,l]);b.push(t);c++}}else c=f.sZeroRecords,1==a.iDraw&&"ajax"==y(a)?c=f.sLoadingRecords:f.sEmptyTable&&0===a.fnRecordsTotal()&&(c=f.sEmptyTable),b[0]=h("<tr/>",{"class":e?d[0]:""}).append(h("<td />",{valign:"top",colSpan:V(a),"class":a.oClasses.sRowEmpty}).html(c))[0];r(a,"aoHeaderCallback","header",[h(a.nTHead).children("tr")[0],Ka(a),g,m,i]);r(a,"aoFooterCallback","footer",[h(a.nTFoot).children("tr")[0],Ka(a),g,m,i]);d=h(a.nTBody);d.children().detach();
d.append(h(b));r(a,"aoDrawCallback","draw",[a]);a.bSorted=!1;a.bFiltered=!1;a.bDrawing=!1}}function T(a,b){var c=a.oFeatures,d=c.bFilter;c.bSort&&nb(a);d?ga(a,a.oPreviousSearch):a.aiDisplay=a.aiDisplayMaster.slice();!0!==b&&(a._iDisplayStart=0);a._drawHold=b;P(a);a._drawHold=!1}function ob(a){var b=a.oClasses,c=h(a.nTable),c=h("<div/>").insertBefore(c),d=a.oFeatures,e=h("<div/>",{id:a.sTableId+"_wrapper","class":b.sWrapper+(a.nTFoot?"":" "+b.sNoFooter)});a.nHolding=c[0];a.nTableWrapper=e[0];a.nTableReinsertBefore=
a.nTable.nextSibling;for(var f=a.sDom.split(""),g,j,i,m,l,q,k=0;k<f.length;k++){g=null;j=f[k];if("<"==j){i=h("<div/>")[0];m=f[k+1];if("'"==m||'"'==m){l="";for(q=2;f[k+q]!=m;)l+=f[k+q],q++;"H"==l?l=b.sJUIHeader:"F"==l&&(l=b.sJUIFooter);-1!=l.indexOf(".")?(m=l.split("."),i.id=m[0].substr(1,m[0].length-1),i.className=m[1]):"#"==l.charAt(0)?i.id=l.substr(1,l.length-1):i.className=l;k+=q}e.append(i);e=h(i)}else if(">"==j)e=e.parent();else if("l"==j&&d.bPaginate&&d.bLengthChange)g=pb(a);else if("f"==j&&
d.bFilter)g=qb(a);else if("r"==j&&d.bProcessing)g=rb(a);else if("t"==j)g=sb(a);else if("i"==j&&d.bInfo)g=tb(a);else if("p"==j&&d.bPaginate)g=ub(a);else if(0!==n.ext.feature.length){i=n.ext.feature;q=0;for(m=i.length;q<m;q++)if(j==i[q].cFeature){g=i[q].fnInit(a);break}}g&&(i=a.aanFeatures,i[j]||(i[j]=[]),i[j].push(g),e.append(g))}c.replaceWith(e);a.nHolding=null}function ea(a,b){var c=h(b).children("tr"),d,e,f,g,j,i,m,l,q,k;a.splice(0,a.length);f=0;for(i=c.length;f<i;f++)a.push([]);f=0;for(i=c.length;f<
i;f++){d=c[f];for(e=d.firstChild;e;){if("TD"==e.nodeName.toUpperCase()||"TH"==e.nodeName.toUpperCase()){l=1*e.getAttribute("colspan");q=1*e.getAttribute("rowspan");l=!l||0===l||1===l?1:l;q=!q||0===q||1===q?1:q;g=0;for(j=a[f];j[g];)g++;m=g;k=1===l?!0:!1;for(j=0;j<l;j++)for(g=0;g<q;g++)a[f+g][m+j]={cell:e,unique:k},a[f+g].nTr=d}e=e.nextSibling}}}function ra(a,b,c){var d=[];c||(c=a.aoHeader,b&&(c=[],ea(c,b)));for(var b=0,e=c.length;b<e;b++)for(var f=0,g=c[b].length;f<g;f++)if(c[b][f].unique&&(!d[f]||
!a.bSortCellsTop))d[f]=c[b][f].cell;return d}function sa(a,b,c){r(a,"aoServerParams","serverParams",[b]);if(b&&h.isArray(b)){var d={},e=/(.*?)\[\]$/;h.each(b,function(a,b){var c=b.name.match(e);c?(c=c[0],d[c]||(d[c]=[]),d[c].push(b.value)):d[b.name]=b.value});b=d}var f,g=a.ajax,j=a.oInstance,i=function(b){r(a,null,"xhr",[a,b,a.jqXHR]);c(b)};if(h.isPlainObject(g)&&g.data){f=g.data;var m="function"===typeof f?f(b,a):f,b="function"===typeof f&&m?m:h.extend(!0,b,m);delete g.data}m={data:b,success:function(b){var c=
b.error||b.sError;c&&K(a,0,c);a.json=b;i(b)},dataType:"json",cache:!1,type:a.sServerMethod,error:function(b,c){var d=r(a,null,"xhr",[a,null,a.jqXHR]);-1===h.inArray(!0,d)&&("parsererror"==c?K(a,0,"Invalid JSON response",1):4===b.readyState&&K(a,0,"Ajax error",7));C(a,!1)}};a.oAjaxData=b;r(a,null,"preXhr",[a,b]);a.fnServerData?a.fnServerData.call(j,a.sAjaxSource,h.map(b,function(a,b){return{name:b,value:a}}),i,a):a.sAjaxSource||"string"===typeof g?a.jqXHR=h.ajax(h.extend(m,{url:g||a.sAjaxSource})):
"function"===typeof g?a.jqXHR=g.call(j,b,i,a):(a.jqXHR=h.ajax(h.extend(m,g)),g.data=f)}function mb(a){return a.bAjaxDataGet?(a.iDraw++,C(a,!0),sa(a,vb(a),function(b){wb(a,b)}),!1):!0}function vb(a){var b=a.aoColumns,c=b.length,d=a.oFeatures,e=a.oPreviousSearch,f=a.aoPreSearchCols,g,j=[],i,m,l,k=X(a);g=a._iDisplayStart;i=!1!==d.bPaginate?a._iDisplayLength:-1;var t=function(a,b){j.push({name:a,value:b})};t("sEcho",a.iDraw);t("iColumns",c);t("sColumns",D(b,"sName").join(","));t("iDisplayStart",g);t("iDisplayLength",
i);var G={draw:a.iDraw,columns:[],order:[],start:g,length:i,search:{value:e.sSearch,regex:e.bRegex}};for(g=0;g<c;g++)m=b[g],l=f[g],i="function"==typeof m.mData?"function":m.mData,G.columns.push({data:i,name:m.sName,searchable:m.bSearchable,orderable:m.bSortable,search:{value:l.sSearch,regex:l.bRegex}}),t("mDataProp_"+g,i),d.bFilter&&(t("sSearch_"+g,l.sSearch),t("bRegex_"+g,l.bRegex),t("bSearchable_"+g,m.bSearchable)),d.bSort&&t("bSortable_"+g,m.bSortable);d.bFilter&&(t("sSearch",e.sSearch),t("bRegex",
e.bRegex));d.bSort&&(h.each(k,function(a,b){G.order.push({column:b.col,dir:b.dir});t("iSortCol_"+a,b.col);t("sSortDir_"+a,b.dir)}),t("iSortingCols",k.length));b=n.ext.legacy.ajax;return null===b?a.sAjaxSource?j:G:b?j:G}function wb(a,b){var c=ta(a,b),d=b.sEcho!==k?b.sEcho:b.draw,e=b.iTotalRecords!==k?b.iTotalRecords:b.recordsTotal,f=b.iTotalDisplayRecords!==k?b.iTotalDisplayRecords:b.recordsFiltered;if(d){if(1*d<a.iDraw)return;a.iDraw=1*d}oa(a);a._iRecordsTotal=parseInt(e,10);a._iRecordsDisplay=parseInt(f,
10);d=0;for(e=c.length;d<e;d++)O(a,c[d]);a.aiDisplay=a.aiDisplayMaster.slice();a.bAjaxDataGet=!1;P(a);a._bInitComplete||ua(a,b);a.bAjaxDataGet=!0;C(a,!1)}function ta(a,b){var c=h.isPlainObject(a.ajax)&&a.ajax.dataSrc!==k?a.ajax.dataSrc:a.sAjaxDataProp;return"data"===c?b.aaData||b[c]:""!==c?S(c)(b):b}function qb(a){var b=a.oClasses,c=a.sTableId,d=a.oLanguage,e=a.oPreviousSearch,f=a.aanFeatures,g='<input type="search" class="'+b.sFilterInput+'"/>',j=d.sSearch,j=j.match(/_INPUT_/)?j.replace("_INPUT_",
g):j+g,b=h("<div/>",{id:!f.f?c+"_filter":null,"class":b.sFilter}).append(h("<label/>").append(j)),f=function(){var b=!this.value?"":this.value;b!=e.sSearch&&(ga(a,{sSearch:b,bRegex:e.bRegex,bSmart:e.bSmart,bCaseInsensitive:e.bCaseInsensitive}),a._iDisplayStart=0,P(a))},g=null!==a.searchDelay?a.searchDelay:"ssp"===y(a)?400:0,i=h("input",b).val(e.sSearch).attr("placeholder",d.sSearchPlaceholder).on("keyup.DT search.DT input.DT paste.DT cut.DT",g?Oa(f,g):f).on("keypress.DT",function(a){if(13==a.keyCode)return!1}).attr("aria-controls",
c);h(a.nTable).on("search.dt.DT",function(b,c){if(a===c)try{i[0]!==H.activeElement&&i.val(e.sSearch)}catch(d){}});return b[0]}function ga(a,b,c){var d=a.oPreviousSearch,e=a.aoPreSearchCols,f=function(a){d.sSearch=a.sSearch;d.bRegex=a.bRegex;d.bSmart=a.bSmart;d.bCaseInsensitive=a.bCaseInsensitive};Ga(a);if("ssp"!=y(a)){xb(a,b.sSearch,c,b.bEscapeRegex!==k?!b.bEscapeRegex:b.bRegex,b.bSmart,b.bCaseInsensitive);f(b);for(b=0;b<e.length;b++)yb(a,e[b].sSearch,b,e[b].bEscapeRegex!==k?!e[b].bEscapeRegex:e[b].bRegex,
e[b].bSmart,e[b].bCaseInsensitive);zb(a)}else f(b);a.bFiltered=!0;r(a,null,"search",[a])}function zb(a){for(var b=n.ext.search,c=a.aiDisplay,d,e,f=0,g=b.length;f<g;f++){for(var j=[],i=0,m=c.length;i<m;i++)e=c[i],d=a.aoData[e],b[f](a,d._aFilterData,e,d._aData,i)&&j.push(e);c.length=0;h.merge(c,j)}}function yb(a,b,c,d,e,f){if(""!==b){for(var g=[],j=a.aiDisplay,d=Pa(b,d,e,f),e=0;e<j.length;e++)b=a.aoData[j[e]]._aFilterData[c],d.test(b)&&g.push(j[e]);a.aiDisplay=g}}function xb(a,b,c,d,e,f){var d=Pa(b,
d,e,f),f=a.oPreviousSearch.sSearch,g=a.aiDisplayMaster,j,e=[];0!==n.ext.search.length&&(c=!0);j=Ab(a);if(0>=b.length)a.aiDisplay=g.slice();else{if(j||c||f.length>b.length||0!==b.indexOf(f)||a.bSorted)a.aiDisplay=g.slice();b=a.aiDisplay;for(c=0;c<b.length;c++)d.test(a.aoData[b[c]]._sFilterRow)&&e.push(b[c]);a.aiDisplay=e}}function Pa(a,b,c,d){a=b?a:Qa(a);c&&(a="^(?=.*?"+h.map(a.match(/"[^"]+"|[^ ]+/g)||[""],function(a){if('"'===a.charAt(0))var b=a.match(/^"(.*)"$/),a=b?b[1]:a;return a.replace('"',
"")}).join(")(?=.*?")+").*$");return RegExp(a,d?"i":"")}function Ab(a){var b=a.aoColumns,c,d,e,f,g,j,i,h,l=n.ext.type.search;c=!1;d=0;for(f=a.aoData.length;d<f;d++)if(h=a.aoData[d],!h._aFilterData){j=[];e=0;for(g=b.length;e<g;e++)c=b[e],c.bSearchable?(i=B(a,d,e,"filter"),l[c.sType]&&(i=l[c.sType](i)),null===i&&(i=""),"string"!==typeof i&&i.toString&&(i=i.toString())):i="",i.indexOf&&-1!==i.indexOf("&")&&(va.innerHTML=i,i=Wb?va.textContent:va.innerText),i.replace&&(i=i.replace(/[\r\n]/g,"")),j.push(i);
h._aFilterData=j;h._sFilterRow=j.join(" ");c=!0}return c}function Bb(a){return{search:a.sSearch,smart:a.bSmart,regex:a.bRegex,caseInsensitive:a.bCaseInsensitive}}function Cb(a){return{sSearch:a.search,bSmart:a.smart,bRegex:a.regex,bCaseInsensitive:a.caseInsensitive}}function tb(a){var b=a.sTableId,c=a.aanFeatures.i,d=h("<div/>",{"class":a.oClasses.sInfo,id:!c?b+"_info":null});c||(a.aoDrawCallback.push({fn:Db,sName:"information"}),d.attr("role","status").attr("aria-live","polite"),h(a.nTable).attr("aria-describedby",
b+"_info"));return d[0]}function Db(a){var b=a.aanFeatures.i;if(0!==b.length){var c=a.oLanguage,d=a._iDisplayStart+1,e=a.fnDisplayEnd(),f=a.fnRecordsTotal(),g=a.fnRecordsDisplay(),j=g?c.sInfo:c.sInfoEmpty;g!==f&&(j+=" "+c.sInfoFiltered);j+=c.sInfoPostFix;j=Eb(a,j);c=c.fnInfoCallback;null!==c&&(j=c.call(a.oInstance,a,d,e,f,g,j));h(b).html(j)}}function Eb(a,b){var c=a.fnFormatNumber,d=a._iDisplayStart+1,e=a._iDisplayLength,f=a.fnRecordsDisplay(),g=-1===e;return b.replace(/_START_/g,c.call(a,d)).replace(/_END_/g,
c.call(a,a.fnDisplayEnd())).replace(/_MAX_/g,c.call(a,a.fnRecordsTotal())).replace(/_TOTAL_/g,c.call(a,f)).replace(/_PAGE_/g,c.call(a,g?1:Math.ceil(d/e))).replace(/_PAGES_/g,c.call(a,g?1:Math.ceil(f/e)))}function ha(a){var b,c,d=a.iInitDisplayStart,e=a.aoColumns,f;c=a.oFeatures;var g=a.bDeferLoading;if(a.bInitialised){ob(a);lb(a);fa(a,a.aoHeader);fa(a,a.aoFooter);C(a,!0);c.bAutoWidth&&Fa(a);b=0;for(c=e.length;b<c;b++)f=e[b],f.sWidth&&(f.nTh.style.width=v(f.sWidth));r(a,null,"preInit",[a]);T(a);e=
y(a);if("ssp"!=e||g)"ajax"==e?sa(a,[],function(c){var f=ta(a,c);for(b=0;b<f.length;b++)O(a,f[b]);a.iInitDisplayStart=d;T(a);C(a,!1);ua(a,c)},a):(C(a,!1),ua(a))}else setTimeout(function(){ha(a)},200)}function ua(a,b){a._bInitComplete=!0;(b||a.oInit.aaData)&&$(a);r(a,null,"plugin-init",[a,b]);r(a,"aoInitComplete","init",[a,b])}function Ra(a,b){var c=parseInt(b,10);a._iDisplayLength=c;Sa(a);r(a,null,"length",[a,c])}function pb(a){for(var b=a.oClasses,c=a.sTableId,d=a.aLengthMenu,e=h.isArray(d[0]),f=
e?d[0]:d,d=e?d[1]:d,e=h("<select/>",{name:c+"_length","aria-controls":c,"class":b.sLengthSelect}),g=0,j=f.length;g<j;g++)e[0][g]=new Option("number"===typeof d[g]?a.fnFormatNumber(d[g]):d[g],f[g]);var i=h("<div><label/></div>").addClass(b.sLength);a.aanFeatures.l||(i[0].id=c+"_length");i.children().append(a.oLanguage.sLengthMenu.replace("_MENU_",e[0].outerHTML));h("select",i).val(a._iDisplayLength).on("change.DT",function(){Ra(a,h(this).val());P(a)});h(a.nTable).on("length.dt.DT",function(b,c,d){a===
c&&h("select",i).val(d)});return i[0]}function ub(a){var b=a.sPaginationType,c=n.ext.pager[b],d="function"===typeof c,e=function(a){P(a)},b=h("<div/>").addClass(a.oClasses.sPaging+b)[0],f=a.aanFeatures;d||c.fnInit(a,b,e);f.p||(b.id=a.sTableId+"_paginate",a.aoDrawCallback.push({fn:function(a){if(d){var b=a._iDisplayStart,i=a._iDisplayLength,h=a.fnRecordsDisplay(),l=-1===i,b=l?0:Math.ceil(b/i),i=l?1:Math.ceil(h/i),h=c(b,i),k,l=0;for(k=f.p.length;l<k;l++)Na(a,"pageButton")(a,f.p[l],l,h,b,i)}else c.fnUpdate(a,
e)},sName:"pagination"}));return b}function Ta(a,b,c){var d=a._iDisplayStart,e=a._iDisplayLength,f=a.fnRecordsDisplay();0===f||-1===e?d=0:"number"===typeof b?(d=b*e,d>f&&(d=0)):"first"==b?d=0:"previous"==b?(d=0<=e?d-e:0,0>d&&(d=0)):"next"==b?d+e<f&&(d+=e):"last"==b?d=Math.floor((f-1)/e)*e:K(a,0,"Unknown paging action: "+b,5);b=a._iDisplayStart!==d;a._iDisplayStart=d;b&&(r(a,null,"page",[a]),c&&P(a));return b}function rb(a){return h("<div/>",{id:!a.aanFeatures.r?a.sTableId+"_processing":null,"class":a.oClasses.sProcessing}).html(a.oLanguage.sProcessing).insertBefore(a.nTable)[0]}
function C(a,b){a.oFeatures.bProcessing&&h(a.aanFeatures.r).css("display",b?"block":"none");r(a,null,"processing",[a,b])}function sb(a){var b=h(a.nTable);b.attr("role","grid");var c=a.oScroll;if(""===c.sX&&""===c.sY)return a.nTable;var d=c.sX,e=c.sY,f=a.oClasses,g=b.children("caption"),j=g.length?g[0]._captionSide:null,i=h(b[0].cloneNode(!1)),m=h(b[0].cloneNode(!1)),l=b.children("tfoot");l.length||(l=null);i=h("<div/>",{"class":f.sScrollWrapper}).append(h("<div/>",{"class":f.sScrollHead}).css({overflow:"hidden",
position:"relative",border:0,width:d?!d?null:v(d):"100%"}).append(h("<div/>",{"class":f.sScrollHeadInner}).css({"box-sizing":"content-box",width:c.sXInner||"100%"}).append(i.removeAttr("id").css("margin-left",0).append("top"===j?g:null).append(b.children("thead"))))).append(h("<div/>",{"class":f.sScrollBody}).css({position:"relative",overflow:"auto",width:!d?null:v(d)}).append(b));l&&i.append(h("<div/>",{"class":f.sScrollFoot}).css({overflow:"hidden",border:0,width:d?!d?null:v(d):"100%"}).append(h("<div/>",
{"class":f.sScrollFootInner}).append(m.removeAttr("id").css("margin-left",0).append("bottom"===j?g:null).append(b.children("tfoot")))));var b=i.children(),k=b[0],f=b[1],t=l?b[2]:null;if(d)h(f).on("scroll.DT",function(){var a=this.scrollLeft;k.scrollLeft=a;l&&(t.scrollLeft=a)});h(f).css(e&&c.bCollapse?"max-height":"height",e);a.nScrollHead=k;a.nScrollBody=f;a.nScrollFoot=t;a.aoDrawCallback.push({fn:la,sName:"scrolling"});return i[0]}function la(a){var b=a.oScroll,c=b.sX,d=b.sXInner,e=b.sY,b=b.iBarWidth,
f=h(a.nScrollHead),g=f[0].style,j=f.children("div"),i=j[0].style,m=j.children("table"),j=a.nScrollBody,l=h(j),q=j.style,t=h(a.nScrollFoot).children("div"),n=t.children("table"),o=h(a.nTHead),p=h(a.nTable),s=p[0],r=s.style,u=a.nTFoot?h(a.nTFoot):null,x=a.oBrowser,U=x.bScrollOversize,Xb=D(a.aoColumns,"nTh"),Q,L,R,w,Ua=[],y=[],z=[],A=[],B,C=function(a){a=a.style;a.paddingTop="0";a.paddingBottom="0";a.borderTopWidth="0";a.borderBottomWidth="0";a.height=0};L=j.scrollHeight>j.clientHeight;if(a.scrollBarVis!==
L&&a.scrollBarVis!==k)a.scrollBarVis=L,$(a);else{a.scrollBarVis=L;p.children("thead, tfoot").remove();u&&(R=u.clone().prependTo(p),Q=u.find("tr"),R=R.find("tr"));w=o.clone().prependTo(p);o=o.find("tr");L=w.find("tr");w.find("th, td").removeAttr("tabindex");c||(q.width="100%",f[0].style.width="100%");h.each(ra(a,w),function(b,c){B=aa(a,b);c.style.width=a.aoColumns[B].sWidth});u&&I(function(a){a.style.width=""},R);f=p.outerWidth();if(""===c){r.width="100%";if(U&&(p.find("tbody").height()>j.offsetHeight||
"scroll"==l.css("overflow-y")))r.width=v(p.outerWidth()-b);f=p.outerWidth()}else""!==d&&(r.width=v(d),f=p.outerWidth());I(C,L);I(function(a){z.push(a.innerHTML);Ua.push(v(h(a).css("width")))},L);I(function(a,b){if(h.inArray(a,Xb)!==-1)a.style.width=Ua[b]},o);h(L).height(0);u&&(I(C,R),I(function(a){A.push(a.innerHTML);y.push(v(h(a).css("width")))},R),I(function(a,b){a.style.width=y[b]},Q),h(R).height(0));I(function(a,b){a.innerHTML='<div class="dataTables_sizing">'+z[b]+"</div>";a.childNodes[0].style.height=
"0";a.childNodes[0].style.overflow="hidden";a.style.width=Ua[b]},L);u&&I(function(a,b){a.innerHTML='<div class="dataTables_sizing">'+A[b]+"</div>";a.childNodes[0].style.height="0";a.childNodes[0].style.overflow="hidden";a.style.width=y[b]},R);if(p.outerWidth()<f){Q=j.scrollHeight>j.offsetHeight||"scroll"==l.css("overflow-y")?f+b:f;if(U&&(j.scrollHeight>j.offsetHeight||"scroll"==l.css("overflow-y")))r.width=v(Q-b);(""===c||""!==d)&&K(a,1,"Possible column misalignment",6)}else Q="100%";q.width=v(Q);
g.width=v(Q);u&&(a.nScrollFoot.style.width=v(Q));!e&&U&&(q.height=v(s.offsetHeight+b));c=p.outerWidth();m[0].style.width=v(c);i.width=v(c);d=p.height()>j.clientHeight||"scroll"==l.css("overflow-y");e="padding"+(x.bScrollbarLeft?"Left":"Right");i[e]=d?b+"px":"0px";u&&(n[0].style.width=v(c),t[0].style.width=v(c),t[0].style[e]=d?b+"px":"0px");p.children("colgroup").insertBefore(p.children("thead"));l.scroll();if((a.bSorted||a.bFiltered)&&!a._drawHold)j.scrollTop=0}}function I(a,b,c){for(var d=0,e=0,
f=b.length,g,j;e<f;){g=b[e].firstChild;for(j=c?c[e].firstChild:null;g;)1===g.nodeType&&(c?a(g,j,d):a(g,d),d++),g=g.nextSibling,j=c?j.nextSibling:null;e++}}function Fa(a){var b=a.nTable,c=a.aoColumns,d=a.oScroll,e=d.sY,f=d.sX,g=d.sXInner,j=c.length,i=ma(a,"bVisible"),m=h("th",a.nTHead),l=b.getAttribute("width"),k=b.parentNode,t=!1,n,o,p=a.oBrowser,d=p.bScrollOversize;(n=b.style.width)&&-1!==n.indexOf("%")&&(l=n);for(n=0;n<i.length;n++)o=c[i[n]],null!==o.sWidth&&(o.sWidth=Fb(o.sWidthOrig,k),t=!0);if(d||
!t&&!f&&!e&&j==V(a)&&j==m.length)for(n=0;n<j;n++)i=aa(a,n),null!==i&&(c[i].sWidth=v(m.eq(n).width()));else{j=h(b).clone().css("visibility","hidden").removeAttr("id");j.find("tbody tr").remove();var s=h("<tr/>").appendTo(j.find("tbody"));j.find("thead, tfoot").remove();j.append(h(a.nTHead).clone()).append(h(a.nTFoot).clone());j.find("tfoot th, tfoot td").css("width","");m=ra(a,j.find("thead")[0]);for(n=0;n<i.length;n++)o=c[i[n]],m[n].style.width=null!==o.sWidthOrig&&""!==o.sWidthOrig?v(o.sWidthOrig):
"",o.sWidthOrig&&f&&h(m[n]).append(h("<div/>").css({width:o.sWidthOrig,margin:0,padding:0,border:0,height:1}));if(a.aoData.length)for(n=0;n<i.length;n++)t=i[n],o=c[t],h(Gb(a,t)).clone(!1).append(o.sContentPadding).appendTo(s);h("[name]",j).removeAttr("name");o=h("<div/>").css(f||e?{position:"absolute",top:0,left:0,height:1,right:0,overflow:"hidden"}:{}).append(j).appendTo(k);f&&g?j.width(g):f?(j.css("width","auto"),j.removeAttr("width"),j.width()<k.clientWidth&&l&&j.width(k.clientWidth)):e?j.width(k.clientWidth):
l&&j.width(l);for(n=e=0;n<i.length;n++)k=h(m[n]),g=k.outerWidth()-k.width(),k=p.bBounding?Math.ceil(m[n].getBoundingClientRect().width):k.outerWidth(),e+=k,c[i[n]].sWidth=v(k-g);b.style.width=v(e);o.remove()}l&&(b.style.width=v(l));if((l||f)&&!a._reszEvt)b=function(){h(E).on("resize.DT-"+a.sInstance,Oa(function(){$(a)}))},d?setTimeout(b,1E3):b(),a._reszEvt=!0}function Fb(a,b){if(!a)return 0;var c=h("<div/>").css("width",v(a)).appendTo(b||H.body),d=c[0].offsetWidth;c.remove();return d}function Gb(a,
b){var c=Hb(a,b);if(0>c)return null;var d=a.aoData[c];return!d.nTr?h("<td/>").html(B(a,c,b,"display"))[0]:d.anCells[b]}function Hb(a,b){for(var c,d=-1,e=-1,f=0,g=a.aoData.length;f<g;f++)c=B(a,f,b,"display")+"",c=c.replace(Yb,""),c=c.replace(/&nbsp;/g," "),c.length>d&&(d=c.length,e=f);return e}function v(a){return null===a?"0px":"number"==typeof a?0>a?"0px":a+"px":a.match(/\d$/)?a+"px":a}function X(a){var b,c,d=[],e=a.aoColumns,f,g,j,i;b=a.aaSortingFixed;c=h.isPlainObject(b);var m=[];f=function(a){a.length&&
!h.isArray(a[0])?m.push(a):h.merge(m,a)};h.isArray(b)&&f(b);c&&b.pre&&f(b.pre);f(a.aaSorting);c&&b.post&&f(b.post);for(a=0;a<m.length;a++){i=m[a][0];f=e[i].aDataSort;b=0;for(c=f.length;b<c;b++)g=f[b],j=e[g].sType||"string",m[a]._idx===k&&(m[a]._idx=h.inArray(m[a][1],e[g].asSorting)),d.push({src:i,col:g,dir:m[a][1],index:m[a]._idx,type:j,formatter:n.ext.type.order[j+"-pre"]})}return d}function nb(a){var b,c,d=[],e=n.ext.type.order,f=a.aoData,g=0,j,i=a.aiDisplayMaster,h;Ga(a);h=X(a);b=0;for(c=h.length;b<
c;b++)j=h[b],j.formatter&&g++,Ib(a,j.col);if("ssp"!=y(a)&&0!==h.length){b=0;for(c=i.length;b<c;b++)d[i[b]]=b;g===h.length?i.sort(function(a,b){var c,e,g,j,i=h.length,k=f[a]._aSortData,n=f[b]._aSortData;for(g=0;g<i;g++)if(j=h[g],c=k[j.col],e=n[j.col],c=c<e?-1:c>e?1:0,0!==c)return"asc"===j.dir?c:-c;c=d[a];e=d[b];return c<e?-1:c>e?1:0}):i.sort(function(a,b){var c,g,j,i,k=h.length,n=f[a]._aSortData,o=f[b]._aSortData;for(j=0;j<k;j++)if(i=h[j],c=n[i.col],g=o[i.col],i=e[i.type+"-"+i.dir]||e["string-"+i.dir],
c=i(c,g),0!==c)return c;c=d[a];g=d[b];return c<g?-1:c>g?1:0})}a.bSorted=!0}function Jb(a){for(var b,c,d=a.aoColumns,e=X(a),a=a.oLanguage.oAria,f=0,g=d.length;f<g;f++){c=d[f];var j=c.asSorting;b=c.sTitle.replace(/<.*?>/g,"");var i=c.nTh;i.removeAttribute("aria-sort");c.bSortable&&(0<e.length&&e[0].col==f?(i.setAttribute("aria-sort","asc"==e[0].dir?"ascending":"descending"),c=j[e[0].index+1]||j[0]):c=j[0],b+="asc"===c?a.sSortAscending:a.sSortDescending);i.setAttribute("aria-label",b)}}function Va(a,
b,c,d){var e=a.aaSorting,f=a.aoColumns[b].asSorting,g=function(a,b){var c=a._idx;c===k&&(c=h.inArray(a[1],f));return c+1<f.length?c+1:b?null:0};"number"===typeof e[0]&&(e=a.aaSorting=[e]);c&&a.oFeatures.bSortMulti?(c=h.inArray(b,D(e,"0")),-1!==c?(b=g(e[c],!0),null===b&&1===e.length&&(b=0),null===b?e.splice(c,1):(e[c][1]=f[b],e[c]._idx=b)):(e.push([b,f[0],0]),e[e.length-1]._idx=0)):e.length&&e[0][0]==b?(b=g(e[0]),e.length=1,e[0][1]=f[b],e[0]._idx=b):(e.length=0,e.push([b,f[0]]),e[0]._idx=0);T(a);"function"==
typeof d&&d(a)}function Ma(a,b,c,d){var e=a.aoColumns[c];Wa(b,{},function(b){!1!==e.bSortable&&(a.oFeatures.bProcessing?(C(a,!0),setTimeout(function(){Va(a,c,b.shiftKey,d);"ssp"!==y(a)&&C(a,!1)},0)):Va(a,c,b.shiftKey,d))})}function wa(a){var b=a.aLastSort,c=a.oClasses.sSortColumn,d=X(a),e=a.oFeatures,f,g;if(e.bSort&&e.bSortClasses){e=0;for(f=b.length;e<f;e++)g=b[e].src,h(D(a.aoData,"anCells",g)).removeClass(c+(2>e?e+1:3));e=0;for(f=d.length;e<f;e++)g=d[e].src,h(D(a.aoData,"anCells",g)).addClass(c+
(2>e?e+1:3))}a.aLastSort=d}function Ib(a,b){var c=a.aoColumns[b],d=n.ext.order[c.sSortDataType],e;d&&(e=d.call(a.oInstance,a,b,ba(a,b)));for(var f,g=n.ext.type.order[c.sType+"-pre"],j=0,i=a.aoData.length;j<i;j++)if(c=a.aoData[j],c._aSortData||(c._aSortData=[]),!c._aSortData[b]||d)f=d?e[j]:B(a,j,b,"sort"),c._aSortData[b]=g?g(f):f}function xa(a){if(a.oFeatures.bStateSave&&!a.bDestroying){var b={time:+new Date,start:a._iDisplayStart,length:a._iDisplayLength,order:h.extend(!0,[],a.aaSorting),search:Bb(a.oPreviousSearch),
columns:h.map(a.aoColumns,function(b,d){return{visible:b.bVisible,search:Bb(a.aoPreSearchCols[d])}})};r(a,"aoStateSaveParams","stateSaveParams",[a,b]);a.oSavedState=b;a.fnStateSaveCallback.call(a.oInstance,a,b)}}function Kb(a,b,c){var d,e,f=a.aoColumns,b=function(b){if(b&&b.time){var g=r(a,"aoStateLoadParams","stateLoadParams",[a,b]);if(-1===h.inArray(!1,g)&&(g=a.iStateDuration,!(0<g&&b.time<+new Date-1E3*g)&&!(b.columns&&f.length!==b.columns.length))){a.oLoadedState=h.extend(!0,{},b);b.start!==k&&
(a._iDisplayStart=b.start,a.iInitDisplayStart=b.start);b.length!==k&&(a._iDisplayLength=b.length);b.order!==k&&(a.aaSorting=[],h.each(b.order,function(b,c){a.aaSorting.push(c[0]>=f.length?[0,c[1]]:c)}));b.search!==k&&h.extend(a.oPreviousSearch,Cb(b.search));if(b.columns){d=0;for(e=b.columns.length;d<e;d++)g=b.columns[d],g.visible!==k&&(f[d].bVisible=g.visible),g.search!==k&&h.extend(a.aoPreSearchCols[d],Cb(g.search))}r(a,"aoStateLoaded","stateLoaded",[a,b])}}c()};if(a.oFeatures.bStateSave){var g=
a.fnStateLoadCallback.call(a.oInstance,a,b);g!==k&&b(g)}else c()}function ya(a){var b=n.settings,a=h.inArray(a,D(b,"nTable"));return-1!==a?b[a]:null}function K(a,b,c,d){c="DataTables warning: "+(a?"table id="+a.sTableId+" - ":"")+c;d&&(c+=". For more information about this error, please see http://datatables.net/tn/"+d);if(b)E.console&&console.log&&console.log(c);else if(b=n.ext,b=b.sErrMode||b.errMode,a&&r(a,null,"error",[a,d,c]),"alert"==b)alert(c);else{if("throw"==b)throw Error(c);"function"==
typeof b&&b(a,d,c)}}function F(a,b,c,d){h.isArray(c)?h.each(c,function(c,d){h.isArray(d)?F(a,b,d[0],d[1]):F(a,b,d)}):(d===k&&(d=c),b[c]!==k&&(a[d]=b[c]))}function Xa(a,b,c){var d,e;for(e in b)b.hasOwnProperty(e)&&(d=b[e],h.isPlainObject(d)?(h.isPlainObject(a[e])||(a[e]={}),h.extend(!0,a[e],d)):a[e]=c&&"data"!==e&&"aaData"!==e&&h.isArray(d)?d.slice():d);return a}function Wa(a,b,c){h(a).on("click.DT",b,function(b){h(a).blur();c(b)}).on("keypress.DT",b,function(a){13===a.which&&(a.preventDefault(),c(a))}).on("selectstart.DT",
function(){return!1})}function z(a,b,c,d){c&&a[b].push({fn:c,sName:d})}function r(a,b,c,d){var e=[];b&&(e=h.map(a[b].slice().reverse(),function(b){return b.fn.apply(a.oInstance,d)}));null!==c&&(b=h.Event(c+".dt"),h(a.nTable).trigger(b,d),e.push(b.result));return e}function Sa(a){var b=a._iDisplayStart,c=a.fnDisplayEnd(),d=a._iDisplayLength;b>=c&&(b=c-d);b-=b%d;if(-1===d||0>b)b=0;a._iDisplayStart=b}function Na(a,b){var c=a.renderer,d=n.ext.renderer[b];return h.isPlainObject(c)&&c[b]?d[c[b]]||d._:"string"===
typeof c?d[c]||d._:d._}function y(a){return a.oFeatures.bServerSide?"ssp":a.ajax||a.sAjaxSource?"ajax":"dom"}function ia(a,b){var c=[],c=Lb.numbers_length,d=Math.floor(c/2);b<=c?c=Y(0,b):a<=d?(c=Y(0,c-2),c.push("ellipsis"),c.push(b-1)):(a>=b-1-d?c=Y(b-(c-2),b):(c=Y(a-d+2,a+d-1),c.push("ellipsis"),c.push(b-1)),c.splice(0,0,"ellipsis"),c.splice(0,0,0));c.DT_el="span";return c}function Da(a){h.each({num:function(b){return za(b,a)},"num-fmt":function(b){return za(b,a,Ya)},"html-num":function(b){return za(b,
a,Aa)},"html-num-fmt":function(b){return za(b,a,Aa,Ya)}},function(b,c){x.type.order[b+a+"-pre"]=c;b.match(/^html\-/)&&(x.type.search[b+a]=x.type.search.html)})}function Mb(a){return function(){var b=[ya(this[n.ext.iApiIndex])].concat(Array.prototype.slice.call(arguments));return n.ext.internal[a].apply(this,b)}}var n=function(a){this.$=function(a,b){return this.api(!0).$(a,b)};this._=function(a,b){return this.api(!0).rows(a,b).data()};this.api=function(a){return a?new s(ya(this[x.iApiIndex])):new s(this)};
this.fnAddData=function(a,b){var c=this.api(!0),d=h.isArray(a)&&(h.isArray(a[0])||h.isPlainObject(a[0]))?c.rows.add(a):c.row.add(a);(b===k||b)&&c.draw();return d.flatten().toArray()};this.fnAdjustColumnSizing=function(a){var b=this.api(!0).columns.adjust(),c=b.settings()[0],d=c.oScroll;a===k||a?b.draw(!1):(""!==d.sX||""!==d.sY)&&la(c)};this.fnClearTable=function(a){var b=this.api(!0).clear();(a===k||a)&&b.draw()};this.fnClose=function(a){this.api(!0).row(a).child.hide()};this.fnDeleteRow=function(a,
b,c){var d=this.api(!0),a=d.rows(a),e=a.settings()[0],h=e.aoData[a[0][0]];a.remove();b&&b.call(this,e,h);(c===k||c)&&d.draw();return h};this.fnDestroy=function(a){this.api(!0).destroy(a)};this.fnDraw=function(a){this.api(!0).draw(a)};this.fnFilter=function(a,b,c,d,e,h){e=this.api(!0);null===b||b===k?e.search(a,c,d,h):e.column(b).search(a,c,d,h);e.draw()};this.fnGetData=function(a,b){var c=this.api(!0);if(a!==k){var d=a.nodeName?a.nodeName.toLowerCase():"";return b!==k||"td"==d||"th"==d?c.cell(a,b).data():
c.row(a).data()||null}return c.data().toArray()};this.fnGetNodes=function(a){var b=this.api(!0);return a!==k?b.row(a).node():b.rows().nodes().flatten().toArray()};this.fnGetPosition=function(a){var b=this.api(!0),c=a.nodeName.toUpperCase();return"TR"==c?b.row(a).index():"TD"==c||"TH"==c?(a=b.cell(a).index(),[a.row,a.columnVisible,a.column]):null};this.fnIsOpen=function(a){return this.api(!0).row(a).child.isShown()};this.fnOpen=function(a,b,c){return this.api(!0).row(a).child(b,c).show().child()[0]};
this.fnPageChange=function(a,b){var c=this.api(!0).page(a);(b===k||b)&&c.draw(!1)};this.fnSetColumnVis=function(a,b,c){a=this.api(!0).column(a).visible(b);(c===k||c)&&a.columns.adjust().draw()};this.fnSettings=function(){return ya(this[x.iApiIndex])};this.fnSort=function(a){this.api(!0).order(a).draw()};this.fnSortListener=function(a,b,c){this.api(!0).order.listener(a,b,c)};this.fnUpdate=function(a,b,c,d,e){var h=this.api(!0);c===k||null===c?h.row(b).data(a):h.cell(b,c).data(a);(e===k||e)&&h.columns.adjust();
(d===k||d)&&h.draw();return 0};this.fnVersionCheck=x.fnVersionCheck;var b=this,c=a===k,d=this.length;c&&(a={});this.oApi=this.internal=x.internal;for(var e in n.ext.internal)e&&(this[e]=Mb(e));this.each(function(){var e={},g=1<d?Xa(e,a,!0):a,j=0,i,e=this.getAttribute("id"),m=!1,l=n.defaults,q=h(this);if("table"!=this.nodeName.toLowerCase())K(null,0,"Non-table node initialisation ("+this.nodeName+")",2);else{fb(l);gb(l.column);J(l,l,!0);J(l.column,l.column,!0);J(l,h.extend(g,q.data()));var t=n.settings,
j=0;for(i=t.length;j<i;j++){var o=t[j];if(o.nTable==this||o.nTHead&&o.nTHead.parentNode==this||o.nTFoot&&o.nTFoot.parentNode==this){var s=g.bRetrieve!==k?g.bRetrieve:l.bRetrieve;if(c||s)return o.oInstance;if(g.bDestroy!==k?g.bDestroy:l.bDestroy){o.oInstance.fnDestroy();break}else{K(o,0,"Cannot reinitialise DataTable",3);return}}if(o.sTableId==this.id){t.splice(j,1);break}}if(null===e||""===e)this.id=e="DataTables_Table_"+n.ext._unique++;var p=h.extend(!0,{},n.models.oSettings,{sDestroyWidth:q[0].style.width,
sInstance:e,sTableId:e});p.nTable=this;p.oApi=b.internal;p.oInit=g;t.push(p);p.oInstance=1===b.length?b:q.dataTable();fb(g);Ca(g.oLanguage);g.aLengthMenu&&!g.iDisplayLength&&(g.iDisplayLength=h.isArray(g.aLengthMenu[0])?g.aLengthMenu[0][0]:g.aLengthMenu[0]);g=Xa(h.extend(!0,{},l),g);F(p.oFeatures,g,"bPaginate bLengthChange bFilter bSort bSortMulti bInfo bProcessing bAutoWidth bSortClasses bServerSide bDeferRender".split(" "));F(p,g,["asStripeClasses","ajax","fnServerData","fnFormatNumber","sServerMethod",
"aaSorting","aaSortingFixed","aLengthMenu","sPaginationType","sAjaxSource","sAjaxDataProp","iStateDuration","sDom","bSortCellsTop","iTabIndex","fnStateLoadCallback","fnStateSaveCallback","renderer","searchDelay","rowId",["iCookieDuration","iStateDuration"],["oSearch","oPreviousSearch"],["aoSearchCols","aoPreSearchCols"],["iDisplayLength","_iDisplayLength"]]);F(p.oScroll,g,[["sScrollX","sX"],["sScrollXInner","sXInner"],["sScrollY","sY"],["bScrollCollapse","bCollapse"]]);F(p.oLanguage,g,"fnInfoCallback");
z(p,"aoDrawCallback",g.fnDrawCallback,"user");z(p,"aoServerParams",g.fnServerParams,"user");z(p,"aoStateSaveParams",g.fnStateSaveParams,"user");z(p,"aoStateLoadParams",g.fnStateLoadParams,"user");z(p,"aoStateLoaded",g.fnStateLoaded,"user");z(p,"aoRowCallback",g.fnRowCallback,"user");z(p,"aoRowCreatedCallback",g.fnCreatedRow,"user");z(p,"aoHeaderCallback",g.fnHeaderCallback,"user");z(p,"aoFooterCallback",g.fnFooterCallback,"user");z(p,"aoInitComplete",g.fnInitComplete,"user");z(p,"aoPreDrawCallback",
g.fnPreDrawCallback,"user");p.rowIdFn=S(g.rowId);hb(p);var u=p.oClasses;h.extend(u,n.ext.classes,g.oClasses);q.addClass(u.sTable);p.iInitDisplayStart===k&&(p.iInitDisplayStart=g.iDisplayStart,p._iDisplayStart=g.iDisplayStart);null!==g.iDeferLoading&&(p.bDeferLoading=!0,e=h.isArray(g.iDeferLoading),p._iRecordsDisplay=e?g.iDeferLoading[0]:g.iDeferLoading,p._iRecordsTotal=e?g.iDeferLoading[1]:g.iDeferLoading);var v=p.oLanguage;h.extend(!0,v,g.oLanguage);v.sUrl&&(h.ajax({dataType:"json",url:v.sUrl,success:function(a){Ca(a);
J(l.oLanguage,a);h.extend(true,v,a);ha(p)},error:function(){ha(p)}}),m=!0);null===g.asStripeClasses&&(p.asStripeClasses=[u.sStripeOdd,u.sStripeEven]);var e=p.asStripeClasses,x=q.children("tbody").find("tr").eq(0);-1!==h.inArray(!0,h.map(e,function(a){return x.hasClass(a)}))&&(h("tbody tr",this).removeClass(e.join(" ")),p.asDestroyStripes=e.slice());e=[];t=this.getElementsByTagName("thead");0!==t.length&&(ea(p.aoHeader,t[0]),e=ra(p));if(null===g.aoColumns){t=[];j=0;for(i=e.length;j<i;j++)t.push(null)}else t=
g.aoColumns;j=0;for(i=t.length;j<i;j++)Ea(p,e?e[j]:null);jb(p,g.aoColumnDefs,t,function(a,b){ka(p,a,b)});if(x.length){var w=function(a,b){return a.getAttribute("data-"+b)!==null?b:null};h(x[0]).children("th, td").each(function(a,b){var c=p.aoColumns[a];if(c.mData===a){var d=w(b,"sort")||w(b,"order"),e=w(b,"filter")||w(b,"search");if(d!==null||e!==null){c.mData={_:a+".display",sort:d!==null?a+".@data-"+d:k,type:d!==null?a+".@data-"+d:k,filter:e!==null?a+".@data-"+e:k};ka(p,a)}}})}var U=p.oFeatures,
e=function(){if(g.aaSorting===k){var a=p.aaSorting;j=0;for(i=a.length;j<i;j++)a[j][1]=p.aoColumns[j].asSorting[0]}wa(p);U.bSort&&z(p,"aoDrawCallback",function(){if(p.bSorted){var a=X(p),b={};h.each(a,function(a,c){b[c.src]=c.dir});r(p,null,"order",[p,a,b]);Jb(p)}});z(p,"aoDrawCallback",function(){(p.bSorted||y(p)==="ssp"||U.bDeferRender)&&wa(p)},"sc");var a=q.children("caption").each(function(){this._captionSide=h(this).css("caption-side")}),b=q.children("thead");b.length===0&&(b=h("<thead/>").appendTo(q));
p.nTHead=b[0];b=q.children("tbody");b.length===0&&(b=h("<tbody/>").appendTo(q));p.nTBody=b[0];b=q.children("tfoot");if(b.length===0&&a.length>0&&(p.oScroll.sX!==""||p.oScroll.sY!==""))b=h("<tfoot/>").appendTo(q);if(b.length===0||b.children().length===0)q.addClass(u.sNoFooter);else if(b.length>0){p.nTFoot=b[0];ea(p.aoFooter,p.nTFoot)}if(g.aaData)for(j=0;j<g.aaData.length;j++)O(p,g.aaData[j]);else(p.bDeferLoading||y(p)=="dom")&&na(p,h(p.nTBody).children("tr"));p.aiDisplay=p.aiDisplayMaster.slice();
p.bInitialised=true;m===false&&ha(p)};g.bStateSave?(U.bStateSave=!0,z(p,"aoDrawCallback",xa,"state_save"),Kb(p,g,e)):e()}});b=null;return this},x,s,o,u,Za={},Nb=/[\r\n]/g,Aa=/<.*?>/g,Zb=/^\d{2,4}[\.\/\-]\d{1,2}[\.\/\-]\d{1,2}([T ]{1}\d{1,2}[:\.]\d{2}([\.:]\d{2})?)?$/,$b=RegExp("(\\/|\\.|\\*|\\+|\\?|\\||\\(|\\)|\\[|\\]|\\{|\\}|\\\\|\\$|\\^|\\-)","g"),Ya=/[',$£€¥%\u2009\u202F\u20BD\u20a9\u20BArfkɃΞ]/gi,M=function(a){return!a||!0===a||"-"===a?!0:!1},Ob=function(a){var b=parseInt(a,10);return!isNaN(b)&&
isFinite(a)?b:null},Pb=function(a,b){Za[b]||(Za[b]=RegExp(Qa(b),"g"));return"string"===typeof a&&"."!==b?a.replace(/\./g,"").replace(Za[b],"."):a},$a=function(a,b,c){var d="string"===typeof a;if(M(a))return!0;b&&d&&(a=Pb(a,b));c&&d&&(a=a.replace(Ya,""));return!isNaN(parseFloat(a))&&isFinite(a)},Qb=function(a,b,c){return M(a)?!0:!(M(a)||"string"===typeof a)?null:$a(a.replace(Aa,""),b,c)?!0:null},D=function(a,b,c){var d=[],e=0,f=a.length;if(c!==k)for(;e<f;e++)a[e]&&a[e][b]&&d.push(a[e][b][c]);else for(;e<
f;e++)a[e]&&d.push(a[e][b]);return d},ja=function(a,b,c,d){var e=[],f=0,g=b.length;if(d!==k)for(;f<g;f++)a[b[f]][c]&&e.push(a[b[f]][c][d]);else for(;f<g;f++)e.push(a[b[f]][c]);return e},Y=function(a,b){var c=[],d;b===k?(b=0,d=a):(d=b,b=a);for(var e=b;e<d;e++)c.push(e);return c},Rb=function(a){for(var b=[],c=0,d=a.length;c<d;c++)a[c]&&b.push(a[c]);return b},qa=function(a){var b;a:{if(!(2>a.length)){b=a.slice().sort();for(var c=b[0],d=1,e=b.length;d<e;d++){if(b[d]===c){b=!1;break a}c=b[d]}}b=!0}if(b)return a.slice();
b=[];var e=a.length,f,g=0,d=0;a:for(;d<e;d++){c=a[d];for(f=0;f<g;f++)if(b[f]===c)continue a;b.push(c);g++}return b};n.util={throttle:function(a,b){var c=b!==k?b:200,d,e;return function(){var b=this,g=+new Date,j=arguments;d&&g<d+c?(clearTimeout(e),e=setTimeout(function(){d=k;a.apply(b,j)},c)):(d=g,a.apply(b,j))}},escapeRegex:function(a){return a.replace($b,"\\$1")}};var A=function(a,b,c){a[b]!==k&&(a[c]=a[b])},ca=/\[.*?\]$/,W=/\(\)$/,Qa=n.util.escapeRegex,va=h("<div>")[0],Wb=va.textContent!==k,Yb=
/<.*?>/g,Oa=n.util.throttle,Sb=[],w=Array.prototype,ac=function(a){var b,c,d=n.settings,e=h.map(d,function(a){return a.nTable});if(a){if(a.nTable&&a.oApi)return[a];if(a.nodeName&&"table"===a.nodeName.toLowerCase())return b=h.inArray(a,e),-1!==b?[d[b]]:null;if(a&&"function"===typeof a.settings)return a.settings().toArray();"string"===typeof a?c=h(a):a instanceof h&&(c=a)}else return[];if(c)return c.map(function(){b=h.inArray(this,e);return-1!==b?d[b]:null}).toArray()};s=function(a,b){if(!(this instanceof
s))return new s(a,b);var c=[],d=function(a){(a=ac(a))&&(c=c.concat(a))};if(h.isArray(a))for(var e=0,f=a.length;e<f;e++)d(a[e]);else d(a);this.context=qa(c);b&&h.merge(this,b);this.selector={rows:null,cols:null,opts:null};s.extend(this,this,Sb)};n.Api=s;h.extend(s.prototype,{any:function(){return 0!==this.count()},concat:w.concat,context:[],count:function(){return this.flatten().length},each:function(a){for(var b=0,c=this.length;b<c;b++)a.call(this,this[b],b,this);return this},eq:function(a){var b=
this.context;return b.length>a?new s(b[a],this[a]):null},filter:function(a){var b=[];if(w.filter)b=w.filter.call(this,a,this);else for(var c=0,d=this.length;c<d;c++)a.call(this,this[c],c,this)&&b.push(this[c]);return new s(this.context,b)},flatten:function(){var a=[];return new s(this.context,a.concat.apply(a,this.toArray()))},join:w.join,indexOf:w.indexOf||function(a,b){for(var c=b||0,d=this.length;c<d;c++)if(this[c]===a)return c;return-1},iterator:function(a,b,c,d){var e=[],f,g,j,h,m,l=this.context,
n,o,u=this.selector;"string"===typeof a&&(d=c,c=b,b=a,a=!1);g=0;for(j=l.length;g<j;g++){var r=new s(l[g]);if("table"===b)f=c.call(r,l[g],g),f!==k&&e.push(f);else if("columns"===b||"rows"===b)f=c.call(r,l[g],this[g],g),f!==k&&e.push(f);else if("column"===b||"column-rows"===b||"row"===b||"cell"===b){o=this[g];"column-rows"===b&&(n=Ba(l[g],u.opts));h=0;for(m=o.length;h<m;h++)f=o[h],f="cell"===b?c.call(r,l[g],f.row,f.column,g,h):c.call(r,l[g],f,g,h,n),f!==k&&e.push(f)}}return e.length||d?(a=new s(l,a?
e.concat.apply([],e):e),b=a.selector,b.rows=u.rows,b.cols=u.cols,b.opts=u.opts,a):this},lastIndexOf:w.lastIndexOf||function(a,b){return this.indexOf.apply(this.toArray.reverse(),arguments)},length:0,map:function(a){var b=[];if(w.map)b=w.map.call(this,a,this);else for(var c=0,d=this.length;c<d;c++)b.push(a.call(this,this[c],c));return new s(this.context,b)},pluck:function(a){return this.map(function(b){return b[a]})},pop:w.pop,push:w.push,reduce:w.reduce||function(a,b){return ib(this,a,b,0,this.length,
1)},reduceRight:w.reduceRight||function(a,b){return ib(this,a,b,this.length-1,-1,-1)},reverse:w.reverse,selector:null,shift:w.shift,slice:function(){return new s(this.context,this)},sort:w.sort,splice:w.splice,toArray:function(){return w.slice.call(this)},to$:function(){return h(this)},toJQuery:function(){return h(this)},unique:function(){return new s(this.context,qa(this))},unshift:w.unshift});s.extend=function(a,b,c){if(c.length&&b&&(b instanceof s||b.__dt_wrapper)){var d,e,f,g=function(a,b,c){return function(){var d=
b.apply(a,arguments);s.extend(d,d,c.methodExt);return d}};d=0;for(e=c.length;d<e;d++)f=c[d],b[f.name]="function"===typeof f.val?g(a,f.val,f):h.isPlainObject(f.val)?{}:f.val,b[f.name].__dt_wrapper=!0,s.extend(a,b[f.name],f.propExt)}};s.register=o=function(a,b){if(h.isArray(a))for(var c=0,d=a.length;c<d;c++)s.register(a[c],b);else for(var e=a.split("."),f=Sb,g,j,c=0,d=e.length;c<d;c++){g=(j=-1!==e[c].indexOf("()"))?e[c].replace("()",""):e[c];var i;a:{i=0;for(var m=f.length;i<m;i++)if(f[i].name===g){i=
f[i];break a}i=null}i||(i={name:g,val:{},methodExt:[],propExt:[]},f.push(i));c===d-1?i.val=b:f=j?i.methodExt:i.propExt}};s.registerPlural=u=function(a,b,c){s.register(a,c);s.register(b,function(){var a=c.apply(this,arguments);return a===this?this:a instanceof s?a.length?h.isArray(a[0])?new s(a.context,a[0]):a[0]:k:a})};o("tables()",function(a){var b;if(a){b=s;var c=this.context;if("number"===typeof a)a=[c[a]];else var d=h.map(c,function(a){return a.nTable}),a=h(d).filter(a).map(function(){var a=h.inArray(this,
d);return c[a]}).toArray();b=new b(a)}else b=this;return b});o("table()",function(a){var a=this.tables(a),b=a.context;return b.length?new s(b[0]):a});u("tables().nodes()","table().node()",function(){return this.iterator("table",function(a){return a.nTable},1)});u("tables().body()","table().body()",function(){return this.iterator("table",function(a){return a.nTBody},1)});u("tables().header()","table().header()",function(){return this.iterator("table",function(a){return a.nTHead},1)});u("tables().footer()",
"table().footer()",function(){return this.iterator("table",function(a){return a.nTFoot},1)});u("tables().containers()","table().container()",function(){return this.iterator("table",function(a){return a.nTableWrapper},1)});o("draw()",function(a){return this.iterator("table",function(b){"page"===a?P(b):("string"===typeof a&&(a="full-hold"===a?!1:!0),T(b,!1===a))})});o("page()",function(a){return a===k?this.page.info().page:this.iterator("table",function(b){Ta(b,a)})});o("page.info()",function(){if(0===
this.context.length)return k;var a=this.context[0],b=a._iDisplayStart,c=a.oFeatures.bPaginate?a._iDisplayLength:-1,d=a.fnRecordsDisplay(),e=-1===c;return{page:e?0:Math.floor(b/c),pages:e?1:Math.ceil(d/c),start:b,end:a.fnDisplayEnd(),length:c,recordsTotal:a.fnRecordsTotal(),recordsDisplay:d,serverSide:"ssp"===y(a)}});o("page.len()",function(a){return a===k?0!==this.context.length?this.context[0]._iDisplayLength:k:this.iterator("table",function(b){Ra(b,a)})});var Tb=function(a,b,c){if(c){var d=new s(a);
d.one("draw",function(){c(d.ajax.json())})}if("ssp"==y(a))T(a,b);else{C(a,!0);var e=a.jqXHR;e&&4!==e.readyState&&e.abort();sa(a,[],function(c){oa(a);for(var c=ta(a,c),d=0,e=c.length;d<e;d++)O(a,c[d]);T(a,b);C(a,!1)})}};o("ajax.json()",function(){var a=this.context;if(0<a.length)return a[0].json});o("ajax.params()",function(){var a=this.context;if(0<a.length)return a[0].oAjaxData});o("ajax.reload()",function(a,b){return this.iterator("table",function(c){Tb(c,!1===b,a)})});o("ajax.url()",function(a){var b=
this.context;if(a===k){if(0===b.length)return k;b=b[0];return b.ajax?h.isPlainObject(b.ajax)?b.ajax.url:b.ajax:b.sAjaxSource}return this.iterator("table",function(b){h.isPlainObject(b.ajax)?b.ajax.url=a:b.ajax=a})});o("ajax.url().load()",function(a,b){return this.iterator("table",function(c){Tb(c,!1===b,a)})});var ab=function(a,b,c,d,e){var f=[],g,j,i,m,l,n;i=typeof b;if(!b||"string"===i||"function"===i||b.length===k)b=[b];i=0;for(m=b.length;i<m;i++){j=b[i]&&b[i].split&&!b[i].match(/[\[\(:]/)?b[i].split(","):
[b[i]];l=0;for(n=j.length;l<n;l++)(g=c("string"===typeof j[l]?h.trim(j[l]):j[l]))&&g.length&&(f=f.concat(g))}a=x.selector[a];if(a.length){i=0;for(m=a.length;i<m;i++)f=a[i](d,e,f)}return qa(f)},bb=function(a){a||(a={});a.filter&&a.search===k&&(a.search=a.filter);return h.extend({search:"none",order:"current",page:"all"},a)},cb=function(a){for(var b=0,c=a.length;b<c;b++)if(0<a[b].length)return a[0]=a[b],a[0].length=1,a.length=1,a.context=[a.context[b]],a;a.length=0;return a},Ba=function(a,b){var c,
d,e,f=[],g=a.aiDisplay;e=a.aiDisplayMaster;var j=b.search;c=b.order;d=b.page;if("ssp"==y(a))return"removed"===j?[]:Y(0,e.length);if("current"==d){c=a._iDisplayStart;for(d=a.fnDisplayEnd();c<d;c++)f.push(g[c])}else if("current"==c||"applied"==c)if("none"==j)f=e.slice();else if("applied"==j)f=g.slice();else{if("removed"==j){var i={};c=0;for(d=g.length;c<d;c++)i[g[c]]=null;f=h.map(e,function(a){return!i.hasOwnProperty(a)?a:null})}}else if("index"==c||"original"==c){c=0;for(d=a.aoData.length;c<d;c++)"none"==
j?f.push(c):(e=h.inArray(c,g),(-1===e&&"removed"==j||0<=e&&"applied"==j)&&f.push(c))}return f};o("rows()",function(a,b){a===k?a="":h.isPlainObject(a)&&(b=a,a="");var b=bb(b),c=this.iterator("table",function(c){var e=b,f;return ab("row",a,function(a){var b=Ob(a),i=c.aoData;if(b!==null&&!e)return[b];f||(f=Ba(c,e));if(b!==null&&h.inArray(b,f)!==-1)return[b];if(a===null||a===k||a==="")return f;if(typeof a==="function")return h.map(f,function(b){var c=i[b];return a(b,c._aData,c.nTr)?b:null});if(a.nodeName){var b=
a._DT_RowIndex,m=a._DT_CellIndex;if(b!==k)return i[b]&&i[b].nTr===a?[b]:[];if(m)return i[m.row]&&i[m.row].nTr===a?[m.row]:[];b=h(a).closest("*[data-dt-row]");return b.length?[b.data("dt-row")]:[]}if(typeof a==="string"&&a.charAt(0)==="#"){b=c.aIds[a.replace(/^#/,"")];if(b!==k)return[b.idx]}b=Rb(ja(c.aoData,f,"nTr"));return h(b).filter(a).map(function(){return this._DT_RowIndex}).toArray()},c,e)},1);c.selector.rows=a;c.selector.opts=b;return c});o("rows().nodes()",function(){return this.iterator("row",
function(a,b){return a.aoData[b].nTr||k},1)});o("rows().data()",function(){return this.iterator(!0,"rows",function(a,b){return ja(a.aoData,b,"_aData")},1)});u("rows().cache()","row().cache()",function(a){return this.iterator("row",function(b,c){var d=b.aoData[c];return"search"===a?d._aFilterData:d._aSortData},1)});u("rows().invalidate()","row().invalidate()",function(a){return this.iterator("row",function(b,c){da(b,c,a)})});u("rows().indexes()","row().index()",function(){return this.iterator("row",
function(a,b){return b},1)});u("rows().ids()","row().id()",function(a){for(var b=[],c=this.context,d=0,e=c.length;d<e;d++)for(var f=0,g=this[d].length;f<g;f++){var h=c[d].rowIdFn(c[d].aoData[this[d][f]]._aData);b.push((!0===a?"#":"")+h)}return new s(c,b)});u("rows().remove()","row().remove()",function(){var a=this;this.iterator("row",function(b,c,d){var e=b.aoData,f=e[c],g,h,i,m,l;e.splice(c,1);g=0;for(h=e.length;g<h;g++)if(i=e[g],l=i.anCells,null!==i.nTr&&(i.nTr._DT_RowIndex=g),null!==l){i=0;for(m=
l.length;i<m;i++)l[i]._DT_CellIndex.row=g}pa(b.aiDisplayMaster,c);pa(b.aiDisplay,c);pa(a[d],c,!1);0<b._iRecordsDisplay&&b._iRecordsDisplay--;Sa(b);c=b.rowIdFn(f._aData);c!==k&&delete b.aIds[c]});this.iterator("table",function(a){for(var c=0,d=a.aoData.length;c<d;c++)a.aoData[c].idx=c});return this});o("rows.add()",function(a){var b=this.iterator("table",function(b){var c,f,g,h=[];f=0;for(g=a.length;f<g;f++)c=a[f],c.nodeName&&"TR"===c.nodeName.toUpperCase()?h.push(na(b,c)[0]):h.push(O(b,c));return h},
1),c=this.rows(-1);c.pop();h.merge(c,b);return c});o("row()",function(a,b){return cb(this.rows(a,b))});o("row().data()",function(a){var b=this.context;if(a===k)return b.length&&this.length?b[0].aoData[this[0]]._aData:k;var c=b[0].aoData[this[0]];c._aData=a;h.isArray(a)&&c.nTr.id&&N(b[0].rowId)(a,c.nTr.id);da(b[0],this[0],"data");return this});o("row().node()",function(){var a=this.context;return a.length&&this.length?a[0].aoData[this[0]].nTr||null:null});o("row.add()",function(a){a instanceof h&&
a.length&&(a=a[0]);var b=this.iterator("table",function(b){return a.nodeName&&"TR"===a.nodeName.toUpperCase()?na(b,a)[0]:O(b,a)});return this.row(b[0])});var db=function(a,b){var c=a.context;if(c.length&&(c=c[0].aoData[b!==k?b:a[0]])&&c._details)c._details.remove(),c._detailsShow=k,c._details=k},Ub=function(a,b){var c=a.context;if(c.length&&a.length){var d=c[0].aoData[a[0]];if(d._details){(d._detailsShow=b)?d._details.insertAfter(d.nTr):d._details.detach();var e=c[0],f=new s(e),g=e.aoData;f.off("draw.dt.DT_details column-visibility.dt.DT_details destroy.dt.DT_details");
0<D(g,"_details").length&&(f.on("draw.dt.DT_details",function(a,b){e===b&&f.rows({page:"current"}).eq(0).each(function(a){a=g[a];a._detailsShow&&a._details.insertAfter(a.nTr)})}),f.on("column-visibility.dt.DT_details",function(a,b){if(e===b)for(var c,d=V(b),f=0,h=g.length;f<h;f++)c=g[f],c._details&&c._details.children("td[colspan]").attr("colspan",d)}),f.on("destroy.dt.DT_details",function(a,b){if(e===b)for(var c=0,d=g.length;c<d;c++)g[c]._details&&db(f,c)}))}}};o("row().child()",function(a,b){var c=
this.context;if(a===k)return c.length&&this.length?c[0].aoData[this[0]]._details:k;if(!0===a)this.child.show();else if(!1===a)db(this);else if(c.length&&this.length){var d=c[0],c=c[0].aoData[this[0]],e=[],f=function(a,b){if(h.isArray(a)||a instanceof h)for(var c=0,k=a.length;c<k;c++)f(a[c],b);else a.nodeName&&"tr"===a.nodeName.toLowerCase()?e.push(a):(c=h("<tr><td/></tr>").addClass(b),h("td",c).addClass(b).html(a)[0].colSpan=V(d),e.push(c[0]))};f(a,b);c._details&&c._details.detach();c._details=h(e);
c._detailsShow&&c._details.insertAfter(c.nTr)}return this});o(["row().child.show()","row().child().show()"],function(){Ub(this,!0);return this});o(["row().child.hide()","row().child().hide()"],function(){Ub(this,!1);return this});o(["row().child.remove()","row().child().remove()"],function(){db(this);return this});o("row().child.isShown()",function(){var a=this.context;return a.length&&this.length?a[0].aoData[this[0]]._detailsShow||!1:!1});var bc=/^([^:]+):(name|visIdx|visible)$/,Vb=function(a,b,
c,d,e){for(var c=[],d=0,f=e.length;d<f;d++)c.push(B(a,e[d],b));return c};o("columns()",function(a,b){a===k?a="":h.isPlainObject(a)&&(b=a,a="");var b=bb(b),c=this.iterator("table",function(c){var e=a,f=b,g=c.aoColumns,j=D(g,"sName"),i=D(g,"nTh");return ab("column",e,function(a){var b=Ob(a);if(a==="")return Y(g.length);if(b!==null)return[b>=0?b:g.length+b];if(typeof a==="function"){var e=Ba(c,f);return h.map(g,function(b,f){return a(f,Vb(c,f,0,0,e),i[f])?f:null})}var k=typeof a==="string"?a.match(bc):
"";if(k)switch(k[2]){case "visIdx":case "visible":b=parseInt(k[1],10);if(b<0){var n=h.map(g,function(a,b){return a.bVisible?b:null});return[n[n.length+b]]}return[aa(c,b)];case "name":return h.map(j,function(a,b){return a===k[1]?b:null});default:return[]}if(a.nodeName&&a._DT_CellIndex)return[a._DT_CellIndex.column];b=h(i).filter(a).map(function(){return h.inArray(this,i)}).toArray();if(b.length||!a.nodeName)return b;b=h(a).closest("*[data-dt-column]");return b.length?[b.data("dt-column")]:[]},c,f)},
1);c.selector.cols=a;c.selector.opts=b;return c});u("columns().header()","column().header()",function(){return this.iterator("column",function(a,b){return a.aoColumns[b].nTh},1)});u("columns().footer()","column().footer()",function(){return this.iterator("column",function(a,b){return a.aoColumns[b].nTf},1)});u("columns().data()","column().data()",function(){return this.iterator("column-rows",Vb,1)});u("columns().dataSrc()","column().dataSrc()",function(){return this.iterator("column",function(a,b){return a.aoColumns[b].mData},
1)});u("columns().cache()","column().cache()",function(a){return this.iterator("column-rows",function(b,c,d,e,f){return ja(b.aoData,f,"search"===a?"_aFilterData":"_aSortData",c)},1)});u("columns().nodes()","column().nodes()",function(){return this.iterator("column-rows",function(a,b,c,d,e){return ja(a.aoData,e,"anCells",b)},1)});u("columns().visible()","column().visible()",function(a,b){var c=this.iterator("column",function(b,c){if(a===k)return b.aoColumns[c].bVisible;var f=b.aoColumns,g=f[c],j=b.aoData,
i,m,l;if(a!==k&&g.bVisible!==a){if(a){var n=h.inArray(!0,D(f,"bVisible"),c+1);i=0;for(m=j.length;i<m;i++)l=j[i].nTr,f=j[i].anCells,l&&l.insertBefore(f[c],f[n]||null)}else h(D(b.aoData,"anCells",c)).detach();g.bVisible=a;fa(b,b.aoHeader);fa(b,b.aoFooter);b.aiDisplay.length||h(b.nTBody).find("td[colspan]").attr("colspan",V(b));xa(b)}});a!==k&&(this.iterator("column",function(c,e){r(c,null,"column-visibility",[c,e,a,b])}),(b===k||b)&&this.columns.adjust());return c});u("columns().indexes()","column().index()",
function(a){return this.iterator("column",function(b,c){return"visible"===a?ba(b,c):c},1)});o("columns.adjust()",function(){return this.iterator("table",function(a){$(a)},1)});o("column.index()",function(a,b){if(0!==this.context.length){var c=this.context[0];if("fromVisible"===a||"toData"===a)return aa(c,b);if("fromData"===a||"toVisible"===a)return ba(c,b)}});o("column()",function(a,b){return cb(this.columns(a,b))});o("cells()",function(a,b,c){h.isPlainObject(a)&&(a.row===k?(c=a,a=null):(c=b,b=null));
h.isPlainObject(b)&&(c=b,b=null);if(null===b||b===k)return this.iterator("table",function(b){var d=a,e=bb(c),f=b.aoData,g=Ba(b,e),j=Rb(ja(f,g,"anCells")),i=h([].concat.apply([],j)),l,m=b.aoColumns.length,n,o,u,s,r,v;return ab("cell",d,function(a){var c=typeof a==="function";if(a===null||a===k||c){n=[];o=0;for(u=g.length;o<u;o++){l=g[o];for(s=0;s<m;s++){r={row:l,column:s};if(c){v=f[l];a(r,B(b,l,s),v.anCells?v.anCells[s]:null)&&n.push(r)}else n.push(r)}}return n}if(h.isPlainObject(a))return a.column!==
k&&a.row!==k&&h.inArray(a.row,g)!==-1?[a]:[];c=i.filter(a).map(function(a,b){return{row:b._DT_CellIndex.row,column:b._DT_CellIndex.column}}).toArray();if(c.length||!a.nodeName)return c;v=h(a).closest("*[data-dt-row]");return v.length?[{row:v.data("dt-row"),column:v.data("dt-column")}]:[]},b,e)});var d=this.columns(b),e=this.rows(a),f,g,j,i,m;this.iterator("table",function(a,b){f=[];g=0;for(j=e[b].length;g<j;g++){i=0;for(m=d[b].length;i<m;i++)f.push({row:e[b][g],column:d[b][i]})}},1);var l=this.cells(f,
c);h.extend(l.selector,{cols:b,rows:a,opts:c});return l});u("cells().nodes()","cell().node()",function(){return this.iterator("cell",function(a,b,c){return(a=a.aoData[b])&&a.anCells?a.anCells[c]:k},1)});o("cells().data()",function(){return this.iterator("cell",function(a,b,c){return B(a,b,c)},1)});u("cells().cache()","cell().cache()",function(a){a="search"===a?"_aFilterData":"_aSortData";return this.iterator("cell",function(b,c,d){return b.aoData[c][a][d]},1)});u("cells().render()","cell().render()",
function(a){return this.iterator("cell",function(b,c,d){return B(b,c,d,a)},1)});u("cells().indexes()","cell().index()",function(){return this.iterator("cell",function(a,b,c){return{row:b,column:c,columnVisible:ba(a,c)}},1)});u("cells().invalidate()","cell().invalidate()",function(a){return this.iterator("cell",function(b,c,d){da(b,c,a,d)})});o("cell()",function(a,b,c){return cb(this.cells(a,b,c))});o("cell().data()",function(a){var b=this.context,c=this[0];if(a===k)return b.length&&c.length?B(b[0],
c[0].row,c[0].column):k;kb(b[0],c[0].row,c[0].column,a);da(b[0],c[0].row,"data",c[0].column);return this});o("order()",function(a,b){var c=this.context;if(a===k)return 0!==c.length?c[0].aaSorting:k;"number"===typeof a?a=[[a,b]]:a.length&&!h.isArray(a[0])&&(a=Array.prototype.slice.call(arguments));return this.iterator("table",function(b){b.aaSorting=a.slice()})});o("order.listener()",function(a,b,c){return this.iterator("table",function(d){Ma(d,a,b,c)})});o("order.fixed()",function(a){if(!a){var b=
this.context,b=b.length?b[0].aaSortingFixed:k;return h.isArray(b)?{pre:b}:b}return this.iterator("table",function(b){b.aaSortingFixed=h.extend(!0,{},a)})});o(["columns().order()","column().order()"],function(a){var b=this;return this.iterator("table",function(c,d){var e=[];h.each(b[d],function(b,c){e.push([c,a])});c.aaSorting=e})});o("search()",function(a,b,c,d){var e=this.context;return a===k?0!==e.length?e[0].oPreviousSearch.sSearch:k:this.iterator("table",function(e){e.oFeatures.bFilter&&ga(e,
h.extend({},e.oPreviousSearch,{sSearch:a+"",bRegex:null===b?!1:b,bSmart:null===c?!0:c,bCaseInsensitive:null===d?!0:d}),1)})});u("columns().search()","column().search()",function(a,b,c,d){return this.iterator("column",function(e,f){var g=e.aoPreSearchCols;if(a===k)return g[f].sSearch;e.oFeatures.bFilter&&(h.extend(g[f],{sSearch:a+"",bRegex:null===b?!1:b,bSmart:null===c?!0:c,bCaseInsensitive:null===d?!0:d}),ga(e,e.oPreviousSearch,1))})});o("state()",function(){return this.context.length?this.context[0].oSavedState:
null});o("state.clear()",function(){return this.iterator("table",function(a){a.fnStateSaveCallback.call(a.oInstance,a,{})})});o("state.loaded()",function(){return this.context.length?this.context[0].oLoadedState:null});o("state.save()",function(){return this.iterator("table",function(a){xa(a)})});n.versionCheck=n.fnVersionCheck=function(a){for(var b=n.version.split("."),a=a.split("."),c,d,e=0,f=a.length;e<f;e++)if(c=parseInt(b[e],10)||0,d=parseInt(a[e],10)||0,c!==d)return c>d;return!0};n.isDataTable=
n.fnIsDataTable=function(a){var b=h(a).get(0),c=!1;if(a instanceof n.Api)return!0;h.each(n.settings,function(a,e){var f=e.nScrollHead?h("table",e.nScrollHead)[0]:null,g=e.nScrollFoot?h("table",e.nScrollFoot)[0]:null;if(e.nTable===b||f===b||g===b)c=!0});return c};n.tables=n.fnTables=function(a){var b=!1;h.isPlainObject(a)&&(b=a.api,a=a.visible);var c=h.map(n.settings,function(b){if(!a||a&&h(b.nTable).is(":visible"))return b.nTable});return b?new s(c):c};n.camelToHungarian=J;o("$()",function(a,b){var c=
this.rows(b).nodes(),c=h(c);return h([].concat(c.filter(a).toArray(),c.find(a).toArray()))});h.each(["on","one","off"],function(a,b){o(b+"()",function(){var a=Array.prototype.slice.call(arguments);a[0]=h.map(a[0].split(/\s/),function(a){return!a.match(/\.dt\b/)?a+".dt":a}).join(" ");var d=h(this.tables().nodes());d[b].apply(d,a);return this})});o("clear()",function(){return this.iterator("table",function(a){oa(a)})});o("settings()",function(){return new s(this.context,this.context)});o("init()",function(){var a=
this.context;return a.length?a[0].oInit:null});o("data()",function(){return this.iterator("table",function(a){return D(a.aoData,"_aData")}).flatten()});o("destroy()",function(a){a=a||!1;return this.iterator("table",function(b){var c=b.nTableWrapper.parentNode,d=b.oClasses,e=b.nTable,f=b.nTBody,g=b.nTHead,j=b.nTFoot,i=h(e),f=h(f),k=h(b.nTableWrapper),l=h.map(b.aoData,function(a){return a.nTr}),o;b.bDestroying=!0;r(b,"aoDestroyCallback","destroy",[b]);a||(new s(b)).columns().visible(!0);k.off(".DT").find(":not(tbody *)").off(".DT");
h(E).off(".DT-"+b.sInstance);e!=g.parentNode&&(i.children("thead").detach(),i.append(g));j&&e!=j.parentNode&&(i.children("tfoot").detach(),i.append(j));b.aaSorting=[];b.aaSortingFixed=[];wa(b);h(l).removeClass(b.asStripeClasses.join(" "));h("th, td",g).removeClass(d.sSortable+" "+d.sSortableAsc+" "+d.sSortableDesc+" "+d.sSortableNone);f.children().detach();f.append(l);g=a?"remove":"detach";i[g]();k[g]();!a&&c&&(c.insertBefore(e,b.nTableReinsertBefore),i.css("width",b.sDestroyWidth).removeClass(d.sTable),
(o=b.asDestroyStripes.length)&&f.children().each(function(a){h(this).addClass(b.asDestroyStripes[a%o])}));c=h.inArray(b,n.settings);-1!==c&&n.settings.splice(c,1)})});h.each(["column","row","cell"],function(a,b){o(b+"s().every()",function(a){var d=this.selector.opts,e=this;return this.iterator(b,function(f,g,h,i,m){a.call(e[b](g,"cell"===b?h:d,"cell"===b?d:k),g,h,i,m)})})});o("i18n()",function(a,b,c){var d=this.context[0],a=S(a)(d.oLanguage);a===k&&(a=b);c!==k&&h.isPlainObject(a)&&(a=a[c]!==k?a[c]:
a._);return a.replace("%d",c)});n.version="1.10.19";n.settings=[];n.models={};n.models.oSearch={bCaseInsensitive:!0,sSearch:"",bRegex:!1,bSmart:!0};n.models.oRow={nTr:null,anCells:null,_aData:[],_aSortData:null,_aFilterData:null,_sFilterRow:null,_sRowStripe:"",src:null,idx:-1};n.models.oColumn={idx:null,aDataSort:null,asSorting:null,bSearchable:null,bSortable:null,bVisible:null,_sManualType:null,_bAttrSrc:!1,fnCreatedCell:null,fnGetData:null,fnSetData:null,mData:null,mRender:null,nTh:null,nTf:null,
sClass:null,sContentPadding:null,sDefaultContent:null,sName:null,sSortDataType:"std",sSortingClass:null,sSortingClassJUI:null,sTitle:null,sType:null,sWidth:null,sWidthOrig:null};n.defaults={aaData:null,aaSorting:[[0,"asc"]],aaSortingFixed:[],ajax:null,aLengthMenu:[10,25,50,100],aoColumns:null,aoColumnDefs:null,aoSearchCols:[],asStripeClasses:null,bAutoWidth:!0,bDeferRender:!1,bDestroy:!1,bFilter:!0,bInfo:!0,bLengthChange:!0,bPaginate:!0,bProcessing:!1,bRetrieve:!1,bScrollCollapse:!1,bServerSide:!1,
bSort:!0,bSortMulti:!0,bSortCellsTop:!1,bSortClasses:!0,bStateSave:!1,fnCreatedRow:null,fnDrawCallback:null,fnFooterCallback:null,fnFormatNumber:function(a){return a.toString().replace(/\B(?=(\d{3})+(?!\d))/g,this.oLanguage.sThousands)},fnHeaderCallback:null,fnInfoCallback:null,fnInitComplete:null,fnPreDrawCallback:null,fnRowCallback:null,fnServerData:null,fnServerParams:null,fnStateLoadCallback:function(a){try{return JSON.parse((-1===a.iStateDuration?sessionStorage:localStorage).getItem("DataTables_"+
a.sInstance+"_"+location.pathname))}catch(b){}},fnStateLoadParams:null,fnStateLoaded:null,fnStateSaveCallback:function(a,b){try{(-1===a.iStateDuration?sessionStorage:localStorage).setItem("DataTables_"+a.sInstance+"_"+location.pathname,JSON.stringify(b))}catch(c){}},fnStateSaveParams:null,iStateDuration:7200,iDeferLoading:null,iDisplayLength:10,iDisplayStart:0,iTabIndex:0,oClasses:{},oLanguage:{oAria:{sSortAscending:": activate to sort column ascending",sSortDescending:": activate to sort column descending"},
oPaginate:{sFirst:"First",sLast:"Last",sNext:"Next",sPrevious:"Previous"},sEmptyTable:"No data available in table",sInfo:"Showing _START_ to _END_ of _TOTAL_ entries",sInfoEmpty:"Showing 0 to 0 of 0 entries",sInfoFiltered:"(filtered from _MAX_ total entries)",sInfoPostFix:"",sDecimal:"",sThousands:",",sLengthMenu:"Show _MENU_ entries",sLoadingRecords:"Loading...",sProcessing:"Processing...",sSearch:"Search:",sSearchPlaceholder:"",sUrl:"",sZeroRecords:"No matching records found"},oSearch:h.extend({},
n.models.oSearch),sAjaxDataProp:"data",sAjaxSource:null,sDom:"lfrtip",searchDelay:null,sPaginationType:"simple_numbers",sScrollX:"",sScrollXInner:"",sScrollY:"",sServerMethod:"GET",renderer:null,rowId:"DT_RowId"};Z(n.defaults);n.defaults.column={aDataSort:null,iDataSort:-1,asSorting:["asc","desc"],bSearchable:!0,bSortable:!0,bVisible:!0,fnCreatedCell:null,mData:null,mRender:null,sCellType:"td",sClass:"",sContentPadding:"",sDefaultContent:null,sName:"",sSortDataType:"std",sTitle:null,sType:null,sWidth:null};
Z(n.defaults.column);n.models.oSettings={oFeatures:{bAutoWidth:null,bDeferRender:null,bFilter:null,bInfo:null,bLengthChange:null,bPaginate:null,bProcessing:null,bServerSide:null,bSort:null,bSortMulti:null,bSortClasses:null,bStateSave:null},oScroll:{bCollapse:null,iBarWidth:0,sX:null,sXInner:null,sY:null},oLanguage:{fnInfoCallback:null},oBrowser:{bScrollOversize:!1,bScrollbarLeft:!1,bBounding:!1,barWidth:0},ajax:null,aanFeatures:[],aoData:[],aiDisplay:[],aiDisplayMaster:[],aIds:{},aoColumns:[],aoHeader:[],
aoFooter:[],oPreviousSearch:{},aoPreSearchCols:[],aaSorting:null,aaSortingFixed:[],asStripeClasses:null,asDestroyStripes:[],sDestroyWidth:0,aoRowCallback:[],aoHeaderCallback:[],aoFooterCallback:[],aoDrawCallback:[],aoRowCreatedCallback:[],aoPreDrawCallback:[],aoInitComplete:[],aoStateSaveParams:[],aoStateLoadParams:[],aoStateLoaded:[],sTableId:"",nTable:null,nTHead:null,nTFoot:null,nTBody:null,nTableWrapper:null,bDeferLoading:!1,bInitialised:!1,aoOpenRows:[],sDom:null,searchDelay:null,sPaginationType:"two_button",
iStateDuration:0,aoStateSave:[],aoStateLoad:[],oSavedState:null,oLoadedState:null,sAjaxSource:null,sAjaxDataProp:null,bAjaxDataGet:!0,jqXHR:null,json:k,oAjaxData:k,fnServerData:null,aoServerParams:[],sServerMethod:null,fnFormatNumber:null,aLengthMenu:null,iDraw:0,bDrawing:!1,iDrawError:-1,_iDisplayLength:10,_iDisplayStart:0,_iRecordsTotal:0,_iRecordsDisplay:0,oClasses:{},bFiltered:!1,bSorted:!1,bSortCellsTop:null,oInit:null,aoDestroyCallback:[],fnRecordsTotal:function(){return"ssp"==y(this)?1*this._iRecordsTotal:
this.aiDisplayMaster.length},fnRecordsDisplay:function(){return"ssp"==y(this)?1*this._iRecordsDisplay:this.aiDisplay.length},fnDisplayEnd:function(){var a=this._iDisplayLength,b=this._iDisplayStart,c=b+a,d=this.aiDisplay.length,e=this.oFeatures,f=e.bPaginate;return e.bServerSide?!1===f||-1===a?b+d:Math.min(b+a,this._iRecordsDisplay):!f||c>d||-1===a?d:c},oInstance:null,sInstance:null,iTabIndex:0,nScrollHead:null,nScrollFoot:null,aLastSort:[],oPlugins:{},rowIdFn:null,rowId:null};n.ext=x={buttons:{},
classes:{},builder:"-source-",errMode:"alert",feature:[],search:[],selector:{cell:[],column:[],row:[]},internal:{},legacy:{ajax:null},pager:{},renderer:{pageButton:{},header:{}},order:{},type:{detect:[],search:{},order:{}},_unique:0,fnVersionCheck:n.fnVersionCheck,iApiIndex:0,oJUIClasses:{},sVersion:n.version};h.extend(x,{afnFiltering:x.search,aTypes:x.type.detect,ofnSearch:x.type.search,oSort:x.type.order,afnSortData:x.order,aoFeatures:x.feature,oApi:x.internal,oStdClasses:x.classes,oPagination:x.pager});
h.extend(n.ext.classes,{sTable:"dataTable",sNoFooter:"no-footer",sPageButton:"paginate_button",sPageButtonActive:"current",sPageButtonDisabled:"disabled",sStripeOdd:"odd",sStripeEven:"even",sRowEmpty:"dataTables_empty",sWrapper:"dataTables_wrapper",sFilter:"dataTables_filter",sInfo:"dataTables_info",sPaging:"dataTables_paginate paging_",sLength:"dataTables_length",sProcessing:"dataTables_processing",sSortAsc:"sorting_asc",sSortDesc:"sorting_desc",sSortable:"sorting",sSortableAsc:"sorting_asc_disabled",
sSortableDesc:"sorting_desc_disabled",sSortableNone:"sorting_disabled",sSortColumn:"sorting_",sFilterInput:"",sLengthSelect:"",sScrollWrapper:"dataTables_scroll",sScrollHead:"dataTables_scrollHead",sScrollHeadInner:"dataTables_scrollHeadInner",sScrollBody:"dataTables_scrollBody",sScrollFoot:"dataTables_scrollFoot",sScrollFootInner:"dataTables_scrollFootInner",sHeaderTH:"",sFooterTH:"",sSortJUIAsc:"",sSortJUIDesc:"",sSortJUI:"",sSortJUIAscAllowed:"",sSortJUIDescAllowed:"",sSortJUIWrapper:"",sSortIcon:"",
sJUIHeader:"",sJUIFooter:""});var Lb=n.ext.pager;h.extend(Lb,{simple:function(){return["previous","next"]},full:function(){return["first","previous","next","last"]},numbers:function(a,b){return[ia(a,b)]},simple_numbers:function(a,b){return["previous",ia(a,b),"next"]},full_numbers:function(a,b){return["first","previous",ia(a,b),"next","last"]},first_last_numbers:function(a,b){return["first",ia(a,b),"last"]},_numbers:ia,numbers_length:7});h.extend(!0,n.ext.renderer,{pageButton:{_:function(a,b,c,d,e,
f){var g=a.oClasses,j=a.oLanguage.oPaginate,i=a.oLanguage.oAria.paginate||{},m,l,n=0,o=function(b,d){var k,s,u,r,v=function(b){Ta(a,b.data.action,true)};k=0;for(s=d.length;k<s;k++){r=d[k];if(h.isArray(r)){u=h("<"+(r.DT_el||"div")+"/>").appendTo(b);o(u,r)}else{m=null;l="";switch(r){case "ellipsis":b.append('<span class="ellipsis">&#x2026;</span>');break;case "first":m=j.sFirst;l=r+(e>0?"":" "+g.sPageButtonDisabled);break;case "previous":m=j.sPrevious;l=r+(e>0?"":" "+g.sPageButtonDisabled);break;case "next":m=
j.sNext;l=r+(e<f-1?"":" "+g.sPageButtonDisabled);break;case "last":m=j.sLast;l=r+(e<f-1?"":" "+g.sPageButtonDisabled);break;default:m=r+1;l=e===r?g.sPageButtonActive:""}if(m!==null){u=h("<a>",{"class":g.sPageButton+" "+l,"aria-controls":a.sTableId,"aria-label":i[r],"data-dt-idx":n,tabindex:a.iTabIndex,id:c===0&&typeof r==="string"?a.sTableId+"_"+r:null}).html(m).appendTo(b);Wa(u,{action:r},v);n++}}}},s;try{s=h(b).find(H.activeElement).data("dt-idx")}catch(u){}o(h(b).empty(),d);s!==k&&h(b).find("[data-dt-idx="+
s+"]").focus()}}});h.extend(n.ext.type.detect,[function(a,b){var c=b.oLanguage.sDecimal;return $a(a,c)?"num"+c:null},function(a){if(a&&!(a instanceof Date)&&!Zb.test(a))return null;var b=Date.parse(a);return null!==b&&!isNaN(b)||M(a)?"date":null},function(a,b){var c=b.oLanguage.sDecimal;return $a(a,c,!0)?"num-fmt"+c:null},function(a,b){var c=b.oLanguage.sDecimal;return Qb(a,c)?"html-num"+c:null},function(a,b){var c=b.oLanguage.sDecimal;return Qb(a,c,!0)?"html-num-fmt"+c:null},function(a){return M(a)||
"string"===typeof a&&-1!==a.indexOf("<")?"html":null}]);h.extend(n.ext.type.search,{html:function(a){return M(a)?a:"string"===typeof a?a.replace(Nb," ").replace(Aa,""):""},string:function(a){return M(a)?a:"string"===typeof a?a.replace(Nb," "):a}});var za=function(a,b,c,d){if(0!==a&&(!a||"-"===a))return-Infinity;b&&(a=Pb(a,b));a.replace&&(c&&(a=a.replace(c,"")),d&&(a=a.replace(d,"")));return 1*a};h.extend(x.type.order,{"date-pre":function(a){a=Date.parse(a);return isNaN(a)?-Infinity:a},"html-pre":function(a){return M(a)?
"":a.replace?a.replace(/<.*?>/g,"").toLowerCase():a+""},"string-pre":function(a){return M(a)?"":"string"===typeof a?a.toLowerCase():!a.toString?"":a.toString()},"string-asc":function(a,b){return a<b?-1:a>b?1:0},"string-desc":function(a,b){return a<b?1:a>b?-1:0}});Da("");h.extend(!0,n.ext.renderer,{header:{_:function(a,b,c,d){h(a.nTable).on("order.dt.DT",function(e,f,g,h){if(a===f){e=c.idx;b.removeClass(c.sSortingClass+" "+d.sSortAsc+" "+d.sSortDesc).addClass(h[e]=="asc"?d.sSortAsc:h[e]=="desc"?d.sSortDesc:
c.sSortingClass)}})},jqueryui:function(a,b,c,d){h("<div/>").addClass(d.sSortJUIWrapper).append(b.contents()).append(h("<span/>").addClass(d.sSortIcon+" "+c.sSortingClassJUI)).appendTo(b);h(a.nTable).on("order.dt.DT",function(e,f,g,h){if(a===f){e=c.idx;b.removeClass(d.sSortAsc+" "+d.sSortDesc).addClass(h[e]=="asc"?d.sSortAsc:h[e]=="desc"?d.sSortDesc:c.sSortingClass);b.find("span."+d.sSortIcon).removeClass(d.sSortJUIAsc+" "+d.sSortJUIDesc+" "+d.sSortJUI+" "+d.sSortJUIAscAllowed+" "+d.sSortJUIDescAllowed).addClass(h[e]==
"asc"?d.sSortJUIAsc:h[e]=="desc"?d.sSortJUIDesc:c.sSortingClassJUI)}})}}});var eb=function(a){return"string"===typeof a?a.replace(/</g,"&lt;").replace(/>/g,"&gt;").replace(/"/g,"&quot;"):a};n.render={number:function(a,b,c,d,e){return{display:function(f){if("number"!==typeof f&&"string"!==typeof f)return f;var g=0>f?"-":"",h=parseFloat(f);if(isNaN(h))return eb(f);h=h.toFixed(c);f=Math.abs(h);h=parseInt(f,10);f=c?b+(f-h).toFixed(c).substring(2):"";return g+(d||"")+h.toString().replace(/\B(?=(\d{3})+(?!\d))/g,
a)+f+(e||"")}}},text:function(){return{display:eb,filter:eb}}};h.extend(n.ext.internal,{_fnExternApiFunc:Mb,_fnBuildAjax:sa,_fnAjaxUpdate:mb,_fnAjaxParameters:vb,_fnAjaxUpdateDraw:wb,_fnAjaxDataSrc:ta,_fnAddColumn:Ea,_fnColumnOptions:ka,_fnAdjustColumnSizing:$,_fnVisibleToColumnIndex:aa,_fnColumnIndexToVisible:ba,_fnVisbleColumns:V,_fnGetColumns:ma,_fnColumnTypes:Ga,_fnApplyColumnDefs:jb,_fnHungarianMap:Z,_fnCamelToHungarian:J,_fnLanguageCompat:Ca,_fnBrowserDetect:hb,_fnAddData:O,_fnAddTr:na,_fnNodeToDataIndex:function(a,
b){return b._DT_RowIndex!==k?b._DT_RowIndex:null},_fnNodeToColumnIndex:function(a,b,c){return h.inArray(c,a.aoData[b].anCells)},_fnGetCellData:B,_fnSetCellData:kb,_fnSplitObjNotation:Ja,_fnGetObjectDataFn:S,_fnSetObjectDataFn:N,_fnGetDataMaster:Ka,_fnClearTable:oa,_fnDeleteIndex:pa,_fnInvalidate:da,_fnGetRowElements:Ia,_fnCreateTr:Ha,_fnBuildHead:lb,_fnDrawHead:fa,_fnDraw:P,_fnReDraw:T,_fnAddOptionsHtml:ob,_fnDetectHeader:ea,_fnGetUniqueThs:ra,_fnFeatureHtmlFilter:qb,_fnFilterComplete:ga,_fnFilterCustom:zb,
_fnFilterColumn:yb,_fnFilter:xb,_fnFilterCreateSearch:Pa,_fnEscapeRegex:Qa,_fnFilterData:Ab,_fnFeatureHtmlInfo:tb,_fnUpdateInfo:Db,_fnInfoMacros:Eb,_fnInitialise:ha,_fnInitComplete:ua,_fnLengthChange:Ra,_fnFeatureHtmlLength:pb,_fnFeatureHtmlPaginate:ub,_fnPageChange:Ta,_fnFeatureHtmlProcessing:rb,_fnProcessingDisplay:C,_fnFeatureHtmlTable:sb,_fnScrollDraw:la,_fnApplyToChildren:I,_fnCalculateColumnWidths:Fa,_fnThrottle:Oa,_fnConvertToWidth:Fb,_fnGetWidestNode:Gb,_fnGetMaxLenString:Hb,_fnStringToCss:v,
_fnSortFlatten:X,_fnSort:nb,_fnSortAria:Jb,_fnSortListener:Va,_fnSortAttachListener:Ma,_fnSortingClasses:wa,_fnSortData:Ib,_fnSaveState:xa,_fnLoadState:Kb,_fnSettingsFromNode:ya,_fnLog:K,_fnMap:F,_fnBindAction:Wa,_fnCallbackReg:z,_fnCallbackFire:r,_fnLengthOverflow:Sa,_fnRenderer:Na,_fnDataSource:y,_fnRowAttributes:La,_fnExtend:Xa,_fnCalculateEnd:function(){}});h.fn.dataTable=n;n.$=h;h.fn.dataTableSettings=n.settings;h.fn.dataTableExt=n.ext;h.fn.DataTable=function(a){return h(this).dataTable(a).api()};
h.each(n,function(a,b){h.fn.DataTable[a]=b});return h.fn.dataTable});

File diff suppressed because one or more lines are too long

Binary file not shown.

Before

Width:  |  Height:  |  Size: 158 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 201 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 157 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

View file

@ -1,11 +1,11 @@
__package__ = 'archivebox'
import re
from pathlib import Path
import requests
import json as pyjson
from typing import List, Optional, Any
from pathlib import Path
from inspect import signature
from functools import wraps
from hashlib import sha256
@ -13,10 +13,9 @@ from urllib.parse import urlparse, quote, unquote
from html import escape, unescape
from datetime import datetime
from dateparser import parse as dateparser
import requests
from requests.exceptions import RequestException, ReadTimeout
from .base32_crockford import encode as base32_encode # type: ignore
from .vendor.base32_crockford import encode as base32_encode # type: ignore
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
try:
@ -246,6 +245,7 @@ def chrome_args(**options) -> List[str]:
return cmd_args
def ansi_to_html(text):
"""
Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html

0
archivebox/vendor/__init__.py vendored Normal file
View file

1
archivebox/vendor/base32-crockford vendored Submodule

@ -0,0 +1 @@
Subproject commit 1ffb6021485b666ea6a562abd0a1ea6f7021188f

1
archivebox/vendor/base32_crockford.py vendored Symbolic link
View file

@ -0,0 +1 @@
base32-crockford/base32_crockford.py

1
archivebox/vendor/django-taggit vendored Submodule

@ -0,0 +1 @@
Subproject commit 1e4dca37e534ca70e99c39fb4198970eb8aad5aa

1
archivebox/vendor/pocket vendored Submodule

@ -0,0 +1 @@
Subproject commit 3a0c5c76832b0e92923383af3f9831ece7901c2f

1
archivebox/vendor/pocket.py vendored Symbolic link
View file

@ -0,0 +1 @@
pocket/pocket.py

1
archivebox/vendor/taggit_utils.py vendored Symbolic link
View file

@ -0,0 +1 @@
django-taggit/taggit/utils.py

View file

@ -16,9 +16,11 @@ cd "$REPO_DIR"
# pipenv install --dev
# the order matters
./bin/build_docs.sh
./bin/build_pip.sh
./bin/build_deb.sh
./bin/build_brew.sh
./bin/build_docker.sh
echo "[√] Done. Install the built package by running:"

29
bin/build_brew.sh Executable file
View file

@ -0,0 +1,29 @@
#!/usr/bin/env bash
### Bash Environment Setup
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
# set -o xtrace
set -o errexit
set -o errtrace
set -o nounset
set -o pipefail
IFS=$'\n'
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
CURRENT_PLAFORM="$(uname)"
REQUIRED_PLATFORM="Darwin"
if [[ "$CURRENT_PLAFORM" != "$REQUIRED_PLATFORM" ]]; then
echo "[!] Skipping the Homebrew package build on $CURRENT_PLAFORM (it can only be run on $REQUIRED_PLATFORM)."
exit 0
fi
cd "$REPO_DIR/brew_dist"
# make sure archivebox.rb is up-to-date with the dependencies
echo "[+] Building Homebrew bottle"
brew install --build-bottle ./archivebox.rb
brew bottle archivebox

View file

@ -12,9 +12,20 @@ IFS=$'\n'
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
source "$REPO_DIR/.venv/bin/activate"
if [[ -f "$REPO_DIR/.venv/bin/activate" ]]; then
source "$REPO_DIR/.venv/bin/activate"
else
echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv"
fi
cd "$REPO_DIR"
CURRENT_PLAFORM="$(uname)"
REQUIRED_PLATFORM="Linux"
if [[ "$CURRENT_PLAFORM" != "$REQUIRED_PLATFORM" ]]; then
echo "[!] Skipping the Debian package build on $CURRENT_PLAFORM (it can only be run on $REQUIRED_PLATFORM)."
exit 0
fi
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
DEBIAN_VERSION="1"
PGP_KEY_ID="7D5695D3B618872647861D51C38137A7C1675988"
@ -30,6 +41,8 @@ PGP_KEY_ID="7D5695D3B618872647861D51C38137A7C1675988"
# cleanup build artifacts
rm -Rf build deb_dist dist archivebox-*.tar.gz
# make sure the stdeb.cfg file is up-to-date with all the dependencies
# build source and binary packages
python3 setup.py --command-packages=stdeb.command \
sdist_dsc --debian-version=$DEBIAN_VERSION \

View file

@ -14,6 +14,7 @@ REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && p
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
cd "$REPO_DIR"
which docker > /dev/null
echo "[+] Building docker image in the background..."
docker build . -t archivebox \

View file

@ -12,7 +12,11 @@ IFS=$'\n'
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
source "$REPO_DIR/.venv/bin/activate"
if [[ -f "$REPO_DIR/.venv/bin/activate" ]]; then
source "$REPO_DIR/.venv/bin/activate"
else
echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv"
fi
cd "$REPO_DIR"

View file

@ -12,13 +12,20 @@ IFS=$'\n'
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
source "$REPO_DIR/.venv/bin/activate"
if [[ -f "$REPO_DIR/.venv/bin/activate" ]]; then
source "$REPO_DIR/.venv/bin/activate"
else
echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv"
fi
cd "$REPO_DIR"
echo "[*] Cleaning up build dirs"
cd "$REPO_DIR"
rm -Rf build dist archivebox.egg-info
rm -Rf build dist
echo "[+] Building sdist, bdist_egg, and bdist_wheel"
python3 setup.py sdist bdist_egg bdist_wheel
echo "[+] Building sdist, bdist_wheel, and egg_info"
python3 setup.py \
sdist --dist-dir=./pip_dist \
bdist_wheel --dist-dir=./pip_dist \
egg_info --egg-base=./pip_dist

View file

@ -48,6 +48,7 @@ echo "${contents}" > package.json
# Push build to github
echo "[^] Pushing source to github"
git add "$REPO_DIR/docs"
git add "$REPO_DIR/deb_dist"
git add "$REPO_DIR/package.json"
git add "$REPO_DIR/package-lock.json"
git add "$REPO_DIR/archivebox.egg-info"
@ -59,10 +60,10 @@ git push origin --tags
# Push releases to github
echo "[^] Uploading to test.pypi.org"
python3 -m twine upload --repository testpypi dist/*
python3 -m twine upload --repository testpypi pip_dist/*.{whl,tar.gz}
echo "[^] Uploading to pypi.org"
python3 -m twine upload --repository pypi dist/*
python3 -m twine upload --repository pypi pip_dist/*.{whl,tar.gz}
echo "[^] Uploading to launchpad.net"
dput archivebox "deb_dist/archivebox_${NEW_VERSION}-1_source.changes"

View file

@ -14,4 +14,4 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
source "$DIR/.venv/bin/activate"
pytest -s
pytest -s --basetemp=tests/out --ignore=archivebox/vendor

1
brew_dist Submodule

@ -0,0 +1 @@
Subproject commit 55f57fcc0e5e7d0b1c0b93cef537cc97936b2848

1
deb_dist Submodule

@ -0,0 +1 @@
Subproject commit cd7f47d48e487c5192670cd5b68042d41b05d281

1
docker Submodule

@ -0,0 +1 @@
Subproject commit 236f7881e3105b218864d9b3185b17c44b306106

View file

@ -21,9 +21,23 @@ services:
environment:
- USE_COLOR=True
- SHOW_PROGRESS=False
- SEARCH_BACKEND_ENGINE=sonic
- SEARCH_BACKEND_HOST_NAME=sonic
volumes:
- ./data:/data
depends_on:
- sonic
# Run sonic search backend
sonic:
image: valeriansaliou/sonic:v1.3.0
ports:
- 1491:1491
environment:
- SEARCH_BACKEND_PASSWORD=SecretPassword
volumes:
- ./etc/sonic/config.cfg:/etc/sonic.cfg
- ./data:/var/lib/sonic/store/
# Optional Addons: tweak these examples as needed for your specific use case
@ -73,3 +87,4 @@ services:
# volumes:
# ./data:/archivebox
# ./data/wayback:/webarchive

2
docs

@ -1 +1 @@
Subproject commit d5071d92367a91bb585abb5da7c65ebc61d0d7b0
Subproject commit 6228411cb63872fb88bc07a0f7be43b7f535337b

66
etc/sonic/config.cfg Normal file
View file

@ -0,0 +1,66 @@
# Sonic
# Fast, lightweight and schema-less search backend
# Configuration file
# Example: https://github.com/valeriansaliou/sonic/blob/master/config.cfg
[server]
log_level = "info"
[channel]
inet = "0.0.0.0:1491"
tcp_timeout = 300
auth_password = "${env.SEARCH_BACKEND_PASSWORD}"
[channel.search]
query_limit_default = 65535
query_limit_maximum = 65535
query_alternates_try = 10
suggest_limit_default = 5
suggest_limit_maximum = 20
[store]
[store.kv]
path = "/var/lib/sonic/store/kv/"
retain_word_objects = 100000
[store.kv.pool]
inactive_after = 1800
[store.kv.database]
flush_after = 900
compress = true
parallelism = 2
max_files = 100
max_compactions = 1
max_flushes = 1
write_buffer = 16384
write_ahead_log = true
[store.fst]
path = "/var/lib/sonic/store/fst/"
[store.fst.pool]
inactive_after = 300
[store.fst.graph]
consolidate_after = 180
max_size = 2048
max_words = 250000

View file

@ -1,6 +1,6 @@
{
"name": "archivebox",
"version": "0.4.24",
"version": "0.5.1",
"description": "ArchiveBox: The self-hosted internet archive",
"author": "Nick Sweeting <archivebox-npm@sweeting.me>",
"license": "MIT",

Some files were not shown because too many files have changed in this diff Show more