Merge pull request #552 from ArchiveBox/v0.5.0
|
@ -3,6 +3,8 @@
|
|||
*.pyc
|
||||
__pycache__/
|
||||
.mypy_cache/
|
||||
.pytest_cache/
|
||||
.github/
|
||||
|
||||
venv/
|
||||
.venv/
|
||||
|
@ -10,6 +12,10 @@ venv/
|
|||
|
||||
build/
|
||||
dist/
|
||||
pip_dist/
|
||||
!pip_dist/archivebox.egg-info/requires.txt
|
||||
brew_dist/
|
||||
assets/
|
||||
|
||||
data/
|
||||
output/
|
||||
|
|
76
.github/workflows/debian.yml
vendored
Normal file
|
@ -0,0 +1,76 @@
|
|||
name: Build Debian package
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
|
||||
env:
|
||||
DEB_BUILD_OPTIONS: nocheck
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-20.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Install packaging dependencies
|
||||
run: |
|
||||
sudo apt install -y \
|
||||
python3 python3-dev python3-pip python3-venv python3-all \
|
||||
dh-python debhelper devscripts dput software-properties-common \
|
||||
python3-distutils python3-setuptools python3-wheel python3-stdeb
|
||||
|
||||
- name: Build Debian/Apt sdist_dsc
|
||||
run: |
|
||||
rm -Rf deb_dist/*
|
||||
python3 setup.py --command-packages=stdeb.command sdist_dsc
|
||||
|
||||
- name: Build Debian/Apt bdist_deb
|
||||
run: |
|
||||
python3 setup.py --command-packages=stdeb.command bdist_deb
|
||||
|
||||
- name: Install archivebox from deb
|
||||
run: |
|
||||
cd deb_dist/
|
||||
sudo apt install ./archivebox*.deb
|
||||
|
||||
- name: Check ArchiveBox version
|
||||
run: |
|
||||
# must create dir needed for snaps to run as non-root on github actions
|
||||
sudo mkdir -p /run/user/1001 && sudo chmod -R 777 /run/user/1001
|
||||
mkdir "${{ github.workspace }}/data" && cd "${{ github.workspace }}/data"
|
||||
archivebox init
|
||||
archivebox config --set SAVE_READABILITY=False
|
||||
archivebox config --set SAVE_MERCURY=False
|
||||
archivebox config --set SAVE_SINGLEFILE=False
|
||||
archivebox --version
|
||||
|
||||
- name: Add some links to test
|
||||
run: |
|
||||
cd "${{ github.workspace }}/data"
|
||||
archivebox add 'https://example.com'
|
||||
archivebox status
|
||||
|
||||
# - name: Commit built package
|
||||
# run: |
|
||||
# cd deb_dist/
|
||||
# git config --local user.email "action@github.com"
|
||||
# git config --local user.name "GitHub Action"
|
||||
# git commit -m "Debian package autobuild" -a
|
||||
|
||||
# - name: Push build to Github
|
||||
# uses: ad-m/github-push-action@master
|
||||
# with:
|
||||
# github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
# repository: ArchiveBox/debian-archivebox
|
||||
# branch: ${{ github.ref }}
|
||||
# directory: deb_dist
|
||||
|
||||
# - name: Push build to Launchpad PPA
|
||||
# run: |
|
||||
# debsign -k "$PGP_KEY_ID" "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes"
|
||||
# dput archivebox "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes"
|
17
.github/workflows/docker.yml
vendored
|
@ -1,4 +1,4 @@
|
|||
name: Docker Push
|
||||
name: Build Docker image
|
||||
|
||||
on:
|
||||
push:
|
||||
|
@ -8,6 +8,10 @@ on:
|
|||
types:
|
||||
- created
|
||||
|
||||
env:
|
||||
DOCKER_IMAGE: archivebox-ci
|
||||
|
||||
|
||||
jobs:
|
||||
buildx:
|
||||
runs-on: ubuntu-latest
|
||||
|
@ -17,20 +21,29 @@ jobs:
|
|||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v1
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
with:
|
||||
version: latest
|
||||
install: true
|
||||
|
||||
- name: Builder instance name
|
||||
run: echo ${{ steps.buildx.outputs.name }}
|
||||
|
||||
- name: Available platforms
|
||||
run: echo ${{ steps.buildx.outputs.platforms }}
|
||||
|
||||
- name: Cache Docker layers
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
|
@ -38,6 +51,7 @@ jobs:
|
|||
key: ${{ runner.os }}-buildx-${{ github.sha }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-buildx-
|
||||
|
||||
- name: Build and push
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v2
|
||||
|
@ -54,5 +68,6 @@ jobs:
|
|||
cache-from: type=local,src=/tmp/.buildx-cache
|
||||
cache-to: type=local,dest=/tmp/.buildx-cache
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7
|
||||
|
||||
- name: Image digest
|
||||
run: echo ${{ steps.docker_build.outputs.digest }}
|
||||
|
|
50
.github/workflows/homebrew.yml
vendored
Normal file
|
@ -0,0 +1,50 @@
|
|||
name: Build Homebrew package
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: macos-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
# TODO: modify archivebox.rb to update src url, hashes, and dependencies
|
||||
|
||||
- name: Build Homebrew Bottle
|
||||
run: |
|
||||
pip3 install --upgrade pip setuptools wheel
|
||||
cd brew_dist/
|
||||
brew install --build-bottle ./archivebox.rb
|
||||
# brew bottle archivebox
|
||||
|
||||
- name: Add some links to test
|
||||
run: |
|
||||
mkdir data && cd data
|
||||
archivebox init
|
||||
archivebox add 'https://example.com'
|
||||
archivebox version
|
||||
archivebox status
|
||||
|
||||
# - name: Commit built package
|
||||
# run: |
|
||||
# cd brew_dist/
|
||||
# git config --local user.email "action@github.com"
|
||||
# git config --local user.name "GitHub Action"
|
||||
# git commit -m "Homebrew package autobuild" -a
|
||||
|
||||
# - name: Push build to Github
|
||||
# uses: ad-m/github-push-action@master
|
||||
# with:
|
||||
# github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
# repository: ArchiveBox/homebrew-archivebox
|
||||
# branch: ${{ github.ref }}
|
||||
# directory: brew_dist
|
||||
|
||||
# TODO: push bottle homebrew core PR with latest changes
|
34
.github/workflows/lint.yml
vendored
Normal file
|
@ -0,0 +1,34 @@
|
|||
name: Run linters
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
|
||||
env:
|
||||
MAX_LINE_LENGTH: 110
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v1
|
||||
with:
|
||||
python-version: 3.9
|
||||
architecture: x64
|
||||
|
||||
- name: Install flake8
|
||||
run: |
|
||||
pip install flake8
|
||||
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
# one pass for show-stopper syntax errors or undefined names
|
||||
flake8 archivebox --count --show-source --statistics
|
||||
# one pass for small stylistic things
|
||||
flake8 archivebox --count --max-line-length="$MAX_LINE_LENGTH" --statistics
|
61
.github/workflows/pip.yml
vendored
Normal file
|
@ -0,0 +1,61 @@
|
|||
name: Build pip package
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-20.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v1
|
||||
with:
|
||||
python-version: 3.9
|
||||
architecture: x64
|
||||
|
||||
- name: Build Python Package
|
||||
run: |
|
||||
pip3 install --upgrade pip setuptools wheel
|
||||
rm -Rf pip_dist/*.whl
|
||||
python3 setup.py \
|
||||
sdist --dist-dir=./pip_dist \
|
||||
bdist_wheel --dist-dir=./pip_dist \
|
||||
egg_info --egg-base=./pip_dist
|
||||
pip install pip_dist/archivebox-*.whl
|
||||
|
||||
- name: Add some links to test
|
||||
run: |
|
||||
mkdir data && cd data
|
||||
archivebox init
|
||||
archivebox add 'https://example.com'
|
||||
archivebox version
|
||||
archivebox status
|
||||
|
||||
# - name: Commit built package
|
||||
# run: |
|
||||
# cd pip_dist/
|
||||
# git config --local user.email "action@github.com"
|
||||
# git config --local user.name "GitHub Action"
|
||||
# git commit -m "Pip package autobuild" -a
|
||||
|
||||
# - name: Push build to Github
|
||||
# uses: ad-m/github-push-action@master
|
||||
# with:
|
||||
# github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
# repository: ArchiveBox/pip-archivebox
|
||||
# branch: ${{ github.ref }}
|
||||
# directory: pip_dist
|
||||
|
||||
# - name: Push build to PyPI
|
||||
# run: |
|
||||
# cd pip_dist/
|
||||
# python3 -m twine upload --repository testpypi pip_dist/*.{whl,tar.gz}
|
||||
# python3 -m twine upload --repository pypi pip_dist/*.{whl,tar.gz}
|
56
.github/workflows/test.yml
vendored
|
@ -1,44 +1,25 @@
|
|||
name: 'Lint, Test, and Build'
|
||||
name: Run tests
|
||||
on: [push]
|
||||
|
||||
env:
|
||||
MAX_LINE_LENGTH: 110
|
||||
DOCKER_IMAGE: archivebox-ci
|
||||
PYTHONIOENCODING: utf-8
|
||||
PYTHONLEGACYWINDOWSSTDIO: utf-8
|
||||
USE_COLOR: False
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v1
|
||||
with:
|
||||
python-version: 3.8
|
||||
architecture: x64
|
||||
|
||||
- name: Install flake8
|
||||
run: |
|
||||
pip install flake8
|
||||
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
# one pass for show-stopper syntax errors or undefined names
|
||||
flake8 archivebox --count --show-source --statistics
|
||||
# one pass for small stylistic things
|
||||
flake8 archivebox --count --max-line-length="$MAX_LINE_LENGTH" --statistics
|
||||
|
||||
test:
|
||||
python_tests:
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest]
|
||||
python: [3.7, 3.8]
|
||||
os: [ubuntu-20.04, macos-latest, windows-latest]
|
||||
python: [3.7]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
### Setup Python & JS Languages
|
||||
|
@ -70,8 +51,9 @@ jobs:
|
|||
|
||||
- name: Install pip dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel pytest bottle
|
||||
./bin/build_pip.sh
|
||||
python -m pip install .
|
||||
python -m pip install pytest bottle
|
||||
|
||||
- name: Get npm cache dir
|
||||
id: npm-cache
|
||||
|
@ -98,19 +80,25 @@ jobs:
|
|||
- name: Directory listing for debugging
|
||||
run: |
|
||||
pwd
|
||||
ls -a ./
|
||||
ls
|
||||
|
||||
- name: Archivebox version
|
||||
run: |
|
||||
archivebox version
|
||||
|
||||
- name: Test built package with pytest
|
||||
# TODO: remove this exception for windows once we get tests passing on that platform
|
||||
if: ${{ !contains(matrix.os, 'windows') }}
|
||||
run: |
|
||||
python -m pytest -s
|
||||
python -m pytest -s --ignore=archivebox/vendor
|
||||
|
||||
docker-test:
|
||||
docker_tests:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
# TODO: as of 2020-11 this helper layer broke, upgrade and re-enable this once it's usable again
|
||||
|
@ -122,8 +110,8 @@ jobs:
|
|||
|
||||
- name: Init data dir
|
||||
run: |
|
||||
mkdir data
|
||||
docker run -v "$PWD"/data:/data "$DOCKER_IMAGE" init
|
||||
mkdir "${{ github.workspace }}/data"
|
||||
docker run -v "${{ github.workspace }}/data":/data "$DOCKER_IMAGE" init
|
||||
|
||||
- name: Run test server
|
||||
run: |
|
||||
|
@ -149,7 +137,7 @@ jobs:
|
|||
docker-compose up -d
|
||||
sleep 5
|
||||
curl --silent --location 'http://127.0.0.1:8000' | grep 'ArchiveBox'
|
||||
curl --silent --location 'http://127.0.0.1:8000/static/admin/js/jquery.init.js' | grep 'django.jQuery'
|
||||
curl --silent --location 'http://127.0.0.1:8000/static/admin/js/jquery.init.js' | grep 'window.django'
|
||||
|
||||
- name: Check added urls show up in index
|
||||
run: |
|
||||
|
|
3
.gitignore
vendored
|
@ -3,6 +3,7 @@
|
|||
*.pyc
|
||||
__pycache__/
|
||||
.mypy_cache/
|
||||
tests/out/
|
||||
|
||||
# Python and Node dependencies
|
||||
venv/
|
||||
|
@ -11,9 +12,9 @@ venv/
|
|||
node_modules/
|
||||
|
||||
# Packaging artifacts
|
||||
archivebox.egg-info
|
||||
archivebox-*.tar.gz
|
||||
build/
|
||||
deb_dist/
|
||||
dist/
|
||||
|
||||
# Data folders
|
||||
|
|
26
.gitmodules
vendored
|
@ -1,3 +1,25 @@
|
|||
[submodule "docs"]
|
||||
path = docs
|
||||
url = https://github.com/pirate/ArchiveBox.wiki.git
|
||||
path = docs
|
||||
url = https://github.com/ArchiveBox/ArchiveBox.wiki.git
|
||||
|
||||
[submodule "deb_dist"]
|
||||
path = deb_dist
|
||||
url = https://github.com/ArchiveBox/debian-archivebox.git
|
||||
[submodule "brew_dist"]
|
||||
path = brew_dist
|
||||
url = https://github.com/ArchiveBox/homebrew-archivebox.git
|
||||
[submodule "pip_dist"]
|
||||
path = pip_dist
|
||||
url = https://github.com/ArchiveBox/pip-archivebox.git
|
||||
[submodule "docker"]
|
||||
path = docker
|
||||
url = https://github.com/ArchiveBox/docker-archivebox.git
|
||||
[submodule "archivebox/vendor/base32-crockford"]
|
||||
path = archivebox/vendor/base32-crockford
|
||||
url = https://github.com/jbittel/base32-crockford
|
||||
[submodule "archivebox/vendor/pocket"]
|
||||
path = archivebox/vendor/pocket
|
||||
url = https://github.com/tapanpandita/pocket
|
||||
[submodule "archivebox/vendor/django-taggit"]
|
||||
path = archivebox/vendor/django-taggit
|
||||
url = https://github.com/jazzband/django-taggit
|
||||
|
|
24
Dockerfile
|
@ -7,7 +7,7 @@
|
|||
# docker run -v "$PWD/data":/data -it archivebox manage createsuperuser
|
||||
# docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
|
||||
|
||||
FROM python:3.8-slim-buster
|
||||
FROM python:3.9-slim-buster
|
||||
|
||||
LABEL name="archivebox" \
|
||||
maintainer="Nick Sweeting <archivebox-docker@sweeting.me>" \
|
||||
|
@ -46,13 +46,20 @@ RUN apt-get update -qq \
|
|||
# Install apt dependencies
|
||||
RUN apt-get update -qq \
|
||||
&& apt-get install -qq -y --no-install-recommends \
|
||||
wget curl chromium git ffmpeg youtube-dl \
|
||||
wget curl chromium git ffmpeg youtube-dl ripgrep \
|
||||
fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install apt development dependencies
|
||||
# RUN apt-get install -qq \
|
||||
# && apt-get install -qq -y --no-install-recommends \
|
||||
# python3 python3-dev python3-pip python3-venv python3-all \
|
||||
# dh-python debhelper devscripts dput software-properties-common \
|
||||
# python3-distutils python3-setuptools python3-wheel python3-stdeb
|
||||
|
||||
# Install Node environment
|
||||
RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
|
||||
&& echo 'deb https://deb.nodesource.com/node_14.x buster main' >> /etc/apt/sources.list \
|
||||
&& echo 'deb https://deb.nodesource.com/node_15.x buster main' >> /etc/apt/sources.list \
|
||||
&& apt-get update -qq \
|
||||
&& apt-get install -qq -y --no-install-recommends \
|
||||
nodejs \
|
||||
|
@ -62,7 +69,6 @@ RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add -
|
|||
WORKDIR "$NODE_DIR"
|
||||
ENV PATH="${PATH}:$NODE_DIR/node_modules/.bin" \
|
||||
npm_config_loglevel=error
|
||||
RUN npm install -g npm
|
||||
ADD ./package.json ./package.json
|
||||
ADD ./package-lock.json ./package-lock.json
|
||||
RUN npm ci
|
||||
|
@ -72,16 +78,17 @@ WORKDIR "$CODE_DIR"
|
|||
ENV PATH="${PATH}:$VENV_PATH/bin"
|
||||
RUN python -m venv --clear --symlinks "$VENV_PATH" \
|
||||
&& pip install --upgrade --quiet pip setuptools
|
||||
ADD ./archivebox.egg-info/requires.txt "$CODE_DIR/archivebox.egg-info/requires.txt"
|
||||
ADD ./pip_dist/archivebox.egg-info/requires.txt "$CODE_DIR/pip_dist/archivebox.egg-info/requires.txt"
|
||||
RUN apt-get update -qq \
|
||||
&& apt-get install -qq -y --no-install-recommends \
|
||||
build-essential python-dev python3-dev \
|
||||
&& grep -B 1000 -E '^$' "$CODE_DIR/archivebox.egg-info/requires.txt" | pip install --quiet -r /dev/stdin \
|
||||
&& grep -B 1000 -E '^$' "$CODE_DIR/pip_dist/archivebox.egg-info/requires.txt" | pip install --quiet -r /dev/stdin \
|
||||
&& pip install --quiet "sonic-client==0.0.5" \
|
||||
&& apt-get purge -y build-essential python-dev python3-dev \
|
||||
&& apt-get autoremove -y \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install ArchiveBox Python package
|
||||
# Install ArchiveBox Python package and its dependencies
|
||||
WORKDIR "$CODE_DIR"
|
||||
ADD . "$CODE_DIR"
|
||||
RUN pip install -e .
|
||||
|
@ -99,7 +106,8 @@ ENV IN_DOCKER=True \
|
|||
MERCURY_BINARY="$NODE_DIR/node_modules/.bin/mercury-parser"
|
||||
|
||||
# Print version for nice docker finish summary
|
||||
RUN archivebox version
|
||||
# RUN archivebox version
|
||||
RUN /app/bin/docker_entrypoint.sh archivebox version
|
||||
|
||||
# Open up the interfaces to the outside world
|
||||
VOLUME "$DATA_DIR"
|
||||
|
|
321
README.md
|
@ -26,62 +26,175 @@
|
|||
<hr/>
|
||||
</div>
|
||||
|
||||
ArchiveBox is a powerful self-hosted internet archiving solution written in Python 3. You feed it URLs of pages you want to archive, and it saves them to disk in a varitety of formats depending on the configuration and the content it detects. ArchiveBox can be installed via [Docker](https://docs.docker.com/get-docker/) (recommended), [`apt`](https://launchpad.net/~archivebox/+archive/ubuntu/archivebox/+packages), [`brew`](https://github.com/ArchiveBox/homebrew-archivebox), or [`pip`](https://www.python.org/downloads/). It works on macOS, Windows, and Linux/BSD (both armv7 and amd64).
|
||||
ArchiveBox is a powerful self-hosted internet archiving solution written in Python 3. You feed it URLs of pages you want to archive, and it saves them to disk in a varitety of formats depending on the configuration and the content it detects.
|
||||
|
||||
Once installed, URLs can be added via the command line `archivebox add` or the built-in Web UI `archivebox server`. It can ingest bookmarks from a service like Pocket/Pinboard, your entire browsing history, RSS feeds, or URLs one at a time.
|
||||
Your archive can be managed through the command line with commands like `archivebox add`, through the built-in Web UI `archivebox server`, or via the Python library API (beta). It can ingest bookmarks from a browser or service like Pocket/Pinboard, your entire browsing history, RSS feeds, or URLs one at a time. You can also schedule regular/realtime imports with `archivebox schedule`.
|
||||
|
||||
The main index is a self-contained `data/index.sqlite3` file, and each snapshot is stored as a folder `data/archive/<timestamp>/`, with an easy-to-read `index.html` and `index.json` within. For each page, ArchiveBox auto-extracts many types of assets/media and saves them in standard formats, with out-of-the-box support for: 3 types of HTML snapshots (wget, Chrome headless, singlefile), a PDF snapshot, a screenshot, a WARC archive, git repositories, images, audio, video, subtitles, article text, and more. The snapshots are browseable and managable offline through the filesystem, the built-in webserver, or the Python API.
|
||||
The main index is a self-contained `index.sqlite3` file, and each snapshot is stored as a folder `data/archive/<timestamp>/`, with an easy-to-read `index.html` and `index.json` within. For each page, ArchiveBox auto-extracts many types of assets/media and saves them in standard formats, with out-of-the-box support for: several types of HTML snapshots (wget, Chrome headless, singlefile), PDF snapshotting, screenshotting, WARC archiving, git repositories, images, audio, video, subtitles, article text, and more. The snapshots are browseable and managable offline through the filesystem, the built-in webserver, or the Python library API.
|
||||
|
||||
#### Quickstart
|
||||
### Quickstart
|
||||
|
||||
It works on Linux/BSD (Intel and ARM CPUs with `docker`/`apt`/`pip3`), macOS (with `docker`/`brew`/`pip3`), and Windows (beta with `docker`/`pip3`).
|
||||
|
||||
**First, get ArchiveBox using your system package manager, Docker, or pip:**
|
||||
```bash
|
||||
# You can run it with Docker or Docker Compose (recommended)
|
||||
docker pull archivebox/archivebox
|
||||
# https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/docker-compose.yml
|
||||
pip3 install archivebox
|
||||
archivebox --version
|
||||
# install extras as-needed, or use one of full setup methods below to get everything out-of-the-box
|
||||
|
||||
# or Ubuntu/Debian
|
||||
mkdir ~/archivebox && cd ~/archivebox # this can be anywhere
|
||||
archivebox init
|
||||
|
||||
archivebox add 'https://example.com'
|
||||
archivebox add --depth=1 'https://example.com'
|
||||
archivebox schedule --every=day https://getpocket.com/users/USERNAME/feed/all
|
||||
archivebox oneshot --extract=title,favicon,media https://www.youtube.com/watch?v=dQw4w9WgXcQ
|
||||
archivebox help # to see more options
|
||||
```
|
||||
|
||||
*(click to expand the sections below for full setup instructions)*
|
||||
|
||||
<details>
|
||||
<summary><b>Get ArchiveBox with <code>docker-compose</code> on any platform (recommended, everything included out-of-the-box)</b></summary>
|
||||
|
||||
First make sure you have Docker installed: https://docs.docker.com/get-docker/
|
||||
<br/><br/>
|
||||
This is the recommended way to run ArchiveBox because it includes *all* the extractors like chrome, wget, youtube-dl, git, etc., as well as full-text search with sonic, and many other great features.
|
||||
|
||||
```bash
|
||||
# create a new empty directory and initalize your collection (can be anywhere)
|
||||
mkdir ~/archivebox && cd ~/archivebox
|
||||
curl -O https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/docker-compose.yml
|
||||
docker-compose run archivebox init
|
||||
docker-compose run archivebox --version
|
||||
|
||||
# start the webserver and open the UI (optional)
|
||||
docker-compose run archivebox manage createsuperuser
|
||||
docker-compose up -d
|
||||
open http://127.0.0.1:8000
|
||||
|
||||
# you can also add links and manage your archive via the CLI:
|
||||
docker-compose run archivebox add 'https://example.com'
|
||||
docker-compose run archivebox status
|
||||
docker-compose run archivebox help # to see more options
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Get ArchiveBox with <code>docker</code> on any platform</b></summary>
|
||||
|
||||
First make sure you have Docker installed: https://docs.docker.com/get-docker/<br/>
|
||||
```bash
|
||||
# create a new empty directory and initalize your collection (can be anywhere)
|
||||
mkdir ~/archivebox && cd ~/archivebox
|
||||
docker run -v $PWD:/data -it archivebox/archivebox init
|
||||
docker run -v $PWD:/data -it archivebox/archivebox --version
|
||||
|
||||
# start the webserver and open the UI (optional)
|
||||
docker run -v $PWD:/data -it archivebox/archivebox manage createsuperuser
|
||||
docker run -v $PWD:/data -p 8000:8000 archivebox/archivebox server 0.0.0.0:8000
|
||||
open http://127.0.0.1:8000
|
||||
|
||||
# you can also add links and manage your archive via the CLI:
|
||||
docker run -v $PWD:/data -it archivebox/archivebox add 'https://example.com'
|
||||
docker run -v $PWD:/data -it archivebox/archivebox status
|
||||
docker run -v $PWD:/data -it archivebox/archivebox help # to see more options
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Get ArchiveBox with <code>apt</code> on Ubuntu >=20.04</b></summary>
|
||||
|
||||
```bash
|
||||
sudo add-apt-repository -u ppa:archivebox/archivebox
|
||||
apt install archivebox
|
||||
sudo apt install archivebox
|
||||
|
||||
# or macOS
|
||||
# create a new empty directory and initalize your collection (can be anywhere)
|
||||
mkdir ~/archivebox && cd ~/archivebox
|
||||
npm install --prefix . 'git+https://github.com/ArchiveBox/ArchiveBox.git'
|
||||
archivebox init
|
||||
archivebox --version
|
||||
|
||||
# start the webserver and open the web UI (optional)
|
||||
archivebox manage createsuperuser
|
||||
archivebox server 0.0.0.0:8000
|
||||
open http://127.0.0.1:8000
|
||||
|
||||
# you can also add URLs and manage the archive via the CLI and filesystem:
|
||||
archivebox add 'https://example.com'
|
||||
archivebox status
|
||||
archivebox list --html --with-headers > index.html
|
||||
archivebox list --json --with-headers > index.json
|
||||
archivebox help # to see more options
|
||||
```
|
||||
|
||||
For other Debian-based systems or older Ubuntu systems you can add these sources to `/etc/apt/sources.list`:
|
||||
```bash
|
||||
deb http://ppa.launchpad.net/archivebox/archivebox/ubuntu focal main
|
||||
deb-src http://ppa.launchpad.net/archivebox/archivebox/ubuntu focal main
|
||||
```
|
||||
(you may need to install some other dependencies manually however)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Get ArchiveBox with <code>brew</code> on macOS >=10.13</b></summary>
|
||||
|
||||
```bash
|
||||
brew install archivebox/archivebox/archivebox
|
||||
|
||||
# or for the Python version only, without wget/git/chrome/etc. included
|
||||
# create a new empty directory and initalize your collection (can be anywhere)
|
||||
mkdir ~/archivebox && cd ~/archivebox
|
||||
npm install --prefix . 'git+https://github.com/ArchiveBox/ArchiveBox.git'
|
||||
archivebox init
|
||||
archivebox --version
|
||||
|
||||
# start the webserver and open the web UI (optional)
|
||||
archivebox manage createsuperuser
|
||||
archivebox server 0.0.0.0:8000
|
||||
open http://127.0.0.1:8000
|
||||
|
||||
# you can also add URLs and manage the archive via the CLI and filesystem:
|
||||
archivebox add 'https://example.com'
|
||||
archivebox status
|
||||
archivebox list --html --with-headers > index.html
|
||||
archivebox list --json --with-headers > index.json
|
||||
archivebox help # to see more options
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Get ArchiveBox with <code>pip</code> on any platform</b></summary>
|
||||
|
||||
```bash
|
||||
pip3 install archivebox
|
||||
|
||||
# If you're using an apt/brew/pip install you can run archivebox commands normally
|
||||
# archivebox [subcommand] [...args]
|
||||
# If you're using Docker you'll have to run the commands like this
|
||||
# docker run -v $PWD:/data -it archivebox/archivebox [subcommand] [...args]
|
||||
# And the equivalent in Docker Compose:
|
||||
# docker-compose run archivebox [subcommand] [...args]
|
||||
```
|
||||
|
||||
<small>Check that everything installed correctly with `archivebox --version`</small>
|
||||
|
||||
**To start using archivebox, you have to create a data folder and `cd` into it:**
|
||||
|
||||
```bash
|
||||
mkdir ~/archivebox && cd ~/archivebox # you can put the collection dir anywhere
|
||||
# create a new empty directory and initalize your collection (can be anywhere)
|
||||
mkdir ~/archivebox && cd ~/archivebox
|
||||
npm install --prefix . 'git+https://github.com/ArchiveBox/ArchiveBox.git'
|
||||
archivebox init
|
||||
archivebox --version
|
||||
# Install any missing extras like wget/git/chrome/etc. manually as needed
|
||||
|
||||
# start the webserver and open the web UI (optional)
|
||||
archivebox manage createsuperuser
|
||||
archivebox server 0.0.0.0:8000
|
||||
open http://127.0.0.1:8000
|
||||
|
||||
# you can also add URLs and manage the archive via the CLI and filesystem:
|
||||
archivebox add 'https://example.com'
|
||||
archivebox status
|
||||
archivebox list --html --with-headers > index.html
|
||||
archivebox list --json --with-headers > index.json
|
||||
archivebox help # to see more options
|
||||
```
|
||||
|
||||
**Then Add some URLs to your archive collection:**
|
||||
```bash
|
||||
archivebox add https://github.com/ArchiveBox/ArchiveBox
|
||||
archivebox add --depth=1 https://example.com
|
||||
```
|
||||
|
||||
**View the snapshots of the URLs you added via the self-hosted web UI:**
|
||||
```bash
|
||||
archivebox manage createsuperuser # create an admin acct
|
||||
archivebox server 0.0.0.0:8000 # start the web server
|
||||
open http://127.0.0.1:8000/ # open the interactive admin panel
|
||||
ls ~/archivebox/archive/*/index.html # or browse the snapshots on disk
|
||||
```
|
||||
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
<div align="center">
|
||||
<img src="https://i.imgur.com/lUuicew.png" width="400px">
|
||||
<br/>
|
||||
|
@ -97,9 +210,9 @@ For more information, see the <a href="https://github.com/ArchiveBox/ArchiveBox/
|
|||
|
||||
ArchiveBox is a command line tool, self-hostable web-archiving server, and Python library all-in-one. It can be installed on Docker, macOS, and Linux/BSD, and Windows. You can download and install it as a Debian/Ubuntu package, Homebrew package, Python3 package, or a Docker image. No matter which install method you choose, they all provide the same CLI, Web UI, and on-disk data format.
|
||||
|
||||
To use ArchiveBox you start by creating a folder for your data to live in (it can be anywhere on your system), and running `archivebox init` inside of it. That will create a sqlite3 index and an `ArchiveBox.conf` file. After that, you can continue to add/export/manage/etc using the CLI `archivebox help`, or you can run the Web UI (recommended).
|
||||
To use ArchiveBox you start by creating a folder for your data to live in (it can be anywhere on your system), and running `archivebox init` inside of it. That will create a sqlite3 index and an `ArchiveBox.conf` file. After that, you can continue to add/export/manage/etc using the CLI `archivebox help`, or you can run the Web UI (recommended). If you only want to archive a single site, you can run `archivebox oneshot` to avoid having to create a whole collection.
|
||||
|
||||
The CLI is considered "stable", the ArchiveBox Python API and REST APIs are in "beta", and the [desktop app](https://github.com/ArchiveBox/desktop) is in "alpha" stage.
|
||||
The CLI is considered "stable", the ArchiveBox Python API and REST APIs are "beta", and the [desktop app](https://github.com/ArchiveBox/desktop) is "alpha".
|
||||
|
||||
At the end of the day, the goal is to sleep soundly knowing that the part of the internet you care about will be automatically preserved in multiple, durable long-term formats that will be accessible for decades (or longer). You can also self-host your archivebox server on a public domain to provide archive.org-style public access to your site snapshots.
|
||||
|
||||
|
@ -146,7 +259,7 @@ archivebox add --depth=1 'https://news.ycombinator.com#2020-12-12'
|
|||
|
||||
See the [Usage: CLI](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#CLI-Usage) page for documentation and examples.
|
||||
|
||||
It also includes a built-in scheduled import feature and browser bookmarklet, so you can ingest URLs from RSS feeds, websites, or the filesystem regularly.
|
||||
It also includes a built-in scheduled import feature with `archivebox schedule` and browser bookmarklet, so you can pull in URLs from RSS feeds, websites, or the filesystem regularly/on-demand.
|
||||
|
||||
## Output formats
|
||||
|
||||
|
@ -161,11 +274,14 @@ The on-disk layout is optimized to be easy to browse by hand and durable long-te
|
|||
- **Index:** `index.html` & `index.json` HTML and JSON index files containing metadata and details
|
||||
- **Title:** `title` title of the site
|
||||
- **Favicon:** `favicon.ico` favicon of the site
|
||||
- **Headers:** `headers.json` Any HTTP headers the site returns are saved in a json file
|
||||
- **SingleFile:** `singlefile.html` HTML snapshot rendered with headless Chrome using SingleFile
|
||||
- **WGET Clone:** `example.com/page-name.html` wget clone of the site, with .html appended if not present
|
||||
- **WARC:** `warc/<timestamp>.gz` gzipped WARC of all the resources fetched while archiving
|
||||
- **PDF:** `output.pdf` Printed PDF of site using headless chrome
|
||||
- **Screenshot:** `screenshot.png` 1440x900 screenshot of site using headless chrome
|
||||
- **DOM Dump:** `output.html` DOM Dump of the HTML after rendering using headless chrome
|
||||
- **Readability:** `article.html/json` Article text extraction using Readability
|
||||
- **URL to Archive.org:** `archive.org.txt` A link to the saved site on archive.org
|
||||
- **Audio & Video:** `media/` all audio/video files + playlists, including subtitles & metadata with youtube-dl
|
||||
- **Source Code:** `git/` clone of any repository found on github, bitbucket, or gitlab links
|
||||
|
@ -191,8 +307,8 @@ archivebox add 'https://example.com/any/url/you/want/to/keep/secret/'
|
|||
|
||||
# without first disabling share the URL with 3rd party APIs:
|
||||
archivebox config --set SAVE_ARCHIVE_DOT_ORG=False # disable saving all URLs in Archive.org
|
||||
archivebox config --set SAVE_FAVICON=False # optional: only the domain is leaked, not full URL
|
||||
archivebox config --get CHROME_VERSION # optional: set this to chromium instead of chrome if you don't like Google
|
||||
archivebox config --set SAVE_FAVICON=False # optional: only the domain is leaked, not full URL
|
||||
archivebox config --set CHROME_BINARY=chromium # optional: switch to chromium to avoid Chrome phoning home to Google
|
||||
```
|
||||
|
||||
Be aware that malicious archived JS can also read the contents of other pages in your archive due to snapshot CSRF and XSS protections being imperfect. See the [Security Overview](https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#stealth-mode) page for more details.
|
||||
|
@ -215,95 +331,6 @@ archivebox add 'https://example.com#2020-10-25'
|
|||
|
||||
---
|
||||
|
||||
# Setup
|
||||
|
||||
## Docker Compose
|
||||
|
||||
*This is the recommended way of running ArchiveBox.*
|
||||
|
||||
It comes with everything working out of the box, including all extractors,
|
||||
a headless browser runtime, a full webserver, and CLI interface.
|
||||
|
||||
```bash
|
||||
# docker-compose run archivebox <command> [args]
|
||||
|
||||
mkdir archivebox && cd archivebox
|
||||
wget 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/docker-compose.yml'
|
||||
docker-compose run archivebox init
|
||||
docker-compose run archivebox add 'https://example.com'
|
||||
docker-compose run archivebox manage createsuperuser
|
||||
docker-compose up
|
||||
open http://127.0.0.1:8000
|
||||
```
|
||||
|
||||
## Docker
|
||||
|
||||
```bash
|
||||
# docker run -v $PWD:/data -it archivebox/archivebox <command> [args]
|
||||
|
||||
mkdir archivebox && cd archivebox
|
||||
docker run -v $PWD:/data -it archivebox/archivebox init
|
||||
docker run -v $PWD:/data -it archivebox/archivebox add 'https://example.com'
|
||||
docker run -v $PWD:/data -it archivebox/archivebox manage createsuperuser
|
||||
|
||||
# run the webserver to access the web UI
|
||||
docker run -v $PWD:/data -it -p 8000:8000 archivebox/archivebox server 0.0.0.0:8000
|
||||
open http://127.0.0.1:8000
|
||||
|
||||
# or export a static version of the index if you dont want to run a server
|
||||
docker run -v $PWD:/data -it archivebox/archivebox list --html --with-headers > index.html
|
||||
docker run -v $PWD:/data -it archivebox/archivebox list --json --with-headers > index.json
|
||||
open ./index.html
|
||||
```
|
||||
|
||||
|
||||
## Bare Metal
|
||||
|
||||
```bash
|
||||
# archivebox <command> [args]
|
||||
|
||||
# on Debian/Ubuntu
|
||||
sudo add-apt-repository -u ppa:archivebox/archivebox
|
||||
apt install archivebox
|
||||
|
||||
# on macOS
|
||||
brew install archivebox/archivebox/archivebox
|
||||
```
|
||||
|
||||
Initialize your archive in a directory somewhere and add some links:
|
||||
```bash
|
||||
mkdir ~/archivebox && cd archivebox
|
||||
npm install --prefix . 'git+https://github.com/ArchiveBox/ArchiveBox.git'
|
||||
archivebox init
|
||||
archivebox add 'https://example.com' # add URLs as args pipe them in via stdin
|
||||
archivebox add --depth=1 https://example.com/table-of-contents.html
|
||||
# it can injest links from many formats, including RSS/JSON/XML/MD/TXT and more
|
||||
curl https://getpocket.com/users/USERNAME/feed/all | archivebox add
|
||||
```
|
||||
|
||||
Start the webserver to access the web UI:
|
||||
```bash
|
||||
archivebox manage createsuperuser
|
||||
archivebox server 0.0.0.0:8000
|
||||
|
||||
open http://127.0.0.1:8000
|
||||
```
|
||||
|
||||
Or export a static HTML version of the index if you don't want to run a webserver:
|
||||
```bash
|
||||
archivebox list --html --with-headers > index.html
|
||||
archivebox list --json --with-headers > index.json
|
||||
open ./index.html
|
||||
```
|
||||
|
||||
To view more information about your dependencies, data, or the CLI:
|
||||
```bash
|
||||
archivebox version
|
||||
archivebox status
|
||||
archivebox help
|
||||
```
|
||||
---
|
||||
|
||||
<div align="center">
|
||||
<img src="https://i.imgur.com/PVO88AZ.png" width="80%"/>
|
||||
</div>
|
||||
|
@ -418,20 +445,19 @@ All contributions to ArchiveBox are welcomed! Check our [issues](https://github.
|
|||
First, install the system dependencies from the "Bare Metal" section above.
|
||||
Then you can clone the ArchiveBox repo and install
|
||||
```python3
|
||||
git clone https://github.com/ArchiveBox/ArchiveBox
|
||||
cd ArchiveBox
|
||||
git clone https://github.com/ArchiveBox/ArchiveBox && cd ArchiveBox
|
||||
git checkout master # or the branch you want to test
|
||||
git pull
|
||||
git submodule update --init --recursive
|
||||
git pull --recurse-submodules
|
||||
|
||||
# Install ArchiveBox + python dependencies
|
||||
python3 -m venv .venv && source .venv/bin/activate && pip install -e .[dev]
|
||||
# or
|
||||
pipenv install --dev && pipenv shell
|
||||
# or with pipenv: pipenv install --dev && pipenv shell
|
||||
|
||||
# Install node dependencies
|
||||
npm install
|
||||
|
||||
# Optional: install the extractor dependencies
|
||||
# Optional: install extractor dependencies manually or with helper script
|
||||
./bin/setup.sh
|
||||
|
||||
# Optional: develop via docker by mounting the code dir into the container
|
||||
|
@ -463,6 +489,17 @@ You can also run all these in Docker. For more examples see the Github Actions C
|
|||
```
|
||||
(uses `pytest -s`)
|
||||
|
||||
#### Make migrations or enter a django shell
|
||||
|
||||
```bash
|
||||
cd archivebox/
|
||||
./manage.py makemigrations
|
||||
|
||||
cd data/
|
||||
archivebox shell
|
||||
```
|
||||
(uses `pytest -s`)
|
||||
|
||||
#### Build the docs, pip package, and docker image
|
||||
|
||||
```bash
|
||||
|
@ -471,6 +508,8 @@ You can also run all these in Docker. For more examples see the Github Actions C
|
|||
# or individually:
|
||||
./bin/build_docs.sh
|
||||
./bin/build_pip.sh
|
||||
./bin/build_deb.sh
|
||||
./bin/build_brew.sh
|
||||
./bin/build_docker.sh
|
||||
```
|
||||
|
||||
|
|
|
@ -1,541 +0,0 @@
|
|||
Metadata-Version: 2.1
|
||||
Name: archivebox
|
||||
Version: 0.4.24
|
||||
Summary: The self-hosted internet archive.
|
||||
Home-page: https://github.com/ArchiveBox/ArchiveBox
|
||||
Author: Nick Sweeting
|
||||
Author-email: git@nicksweeting.com
|
||||
License: MIT
|
||||
Project-URL: Source, https://github.com/ArchiveBox/ArchiveBox
|
||||
Project-URL: Documentation, https://github.com/ArchiveBox/ArchiveBox/wiki
|
||||
Project-URL: Bug Tracker, https://github.com/ArchiveBox/ArchiveBox/issues
|
||||
Project-URL: Changelog, https://github.com/ArchiveBox/ArchiveBox/wiki/Changelog
|
||||
Project-URL: Roadmap, https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap
|
||||
Project-URL: Community, https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community
|
||||
Project-URL: Donate, https://github.com/ArchiveBox/ArchiveBox/wiki/Donations
|
||||
Description: <div align="center">
|
||||
<em><img src="https://i.imgur.com/5B48E3N.png" height="90px"></em>
|
||||
<h1>ArchiveBox<br/><sub>The open-source self-hosted web archive.</sub></h1>
|
||||
|
||||
▶️ <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart">Quickstart</a> |
|
||||
<a href="https://archivebox.zervice.io/">Demo</a> |
|
||||
<a href="https://github.com/ArchiveBox/ArchiveBox">Github</a> |
|
||||
<a href="https://github.com/ArchiveBox/ArchiveBox/wiki">Documentation</a> |
|
||||
<a href="#background--motivation">Info & Motivation</a> |
|
||||
<a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community">Community</a> |
|
||||
<a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap">Roadmap</a>
|
||||
|
||||
<pre>
|
||||
"Your own personal internet archive" (网站存档 / 爬虫)
|
||||
</pre>
|
||||
|
||||
<!--<a href="http://webchat.freenode.net?channels=ArchiveBox&uio=d4"><img src="https://img.shields.io/badge/Community_chat-IRC-%2328A745.svg"/></a>-->
|
||||
|
||||
<a href="https://github.com/ArchiveBox/ArchiveBox/blob/master/LICENSE"><img src="https://img.shields.io/badge/Open_source-MIT-green.svg?logo=git&logoColor=green"/></a>
|
||||
<a href="https://github.com/ArchiveBox/ArchiveBox/commits/dev"><img src="https://img.shields.io/github/last-commit/ArchiveBox/ArchiveBox.svg?logo=Sublime+Text&logoColor=green&label=Active"/></a>
|
||||
<a href="https://github.com/ArchiveBox/ArchiveBox"><img src="https://img.shields.io/github/stars/ArchiveBox/ArchiveBox.svg?logo=github&label=Stars&logoColor=blue"/></a>
|
||||
<a href="https://test.pypi.org/project/archivebox/"><img src="https://img.shields.io/badge/Python-%3E%3D3.7-yellow.svg?logo=python&logoColor=yellow"/></a>
|
||||
<a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Install#dependencies"><img src="https://img.shields.io/badge/Chromium-%3E%3D59-orange.svg?logo=Google+Chrome&logoColor=orange"/></a>
|
||||
<a href="https://hub.docker.com/r/archivebox/archivebox"><img src="https://img.shields.io/badge/Docker-all%20platforms-lightblue.svg?logo=docker&logoColor=lightblue"/></a>
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
|
||||
ArchiveBox is a powerful self-hosted internet archiving solution written in Python 3. You feed it URLs of pages you want to archive, and it saves them to disk in a varitety of formats depending on the configuration and the content it detects. ArchiveBox can be installed via [Docker](https://docs.docker.com/get-docker/) (recommended) or [`pip`](https://www.python.org/downloads/). It works on macOS, Windows, and Linux/BSD (both armv7 and amd64).
|
||||
|
||||
Once installed, URLs can be added via the command line `archivebox add` or the built-in Web UI `archivebox server`. It can ingest bookmarks from a service like Pocket/Pinboard, your entire browsing history, RSS feeds, or URLs one at a time.
|
||||
|
||||
The main index is a self-contained `data/index.sqlite3` file, and each snapshot is stored as a folder `data/archive/<timestamp>/`, with an easy-to-read `index.html` and `index.json` within. For each page, ArchiveBox auto-extracts many types of assets/media and saves them in standard formats, with out-of-the-box support for: 3 types of HTML snapshots (wget, Chrome headless, singlefile), a PDF snapshot, a screenshot, a WARC archive, git repositories, images, audio, video, subtitles, article text, and more. The snapshots are browseable and managable offline through the filesystem, the built-in webserver, or the Python API.
|
||||
|
||||
|
||||
#### Quickstart
|
||||
|
||||
```bash
|
||||
# 1. Create a folder somewhere to hold your ArchiveBox data
|
||||
mkdir ~/archivebox && cd ~/archivebox
|
||||
docker run -v $PWD:/data -it archivebox/archivebox init
|
||||
|
||||
# 2. Archive some URLs to get started
|
||||
docker run -v $PWD:/data -t archivebox/archivebox add https://github.com/ArchiveBox/ArchiveBox
|
||||
docker run -v $PWD:/data -t archivebox/archivebox add --depth=1 https://example.com
|
||||
|
||||
# 3. Then view the snapshots of the URLs you added via the self-hosted web UI
|
||||
docker run -v $PWD:/data -it archivebox/archivebox manage createsuperuser # create an admin acct
|
||||
docker run -v $PWD:/data -p 8000:8000 archivebox/archivebox # start the web server
|
||||
open http://127.0.0.1:8000/ # open the interactive admin panel
|
||||
ls archive/*/index.html # or just browse snapshots on disk
|
||||
```
|
||||
|
||||
<div align="center">
|
||||
<img src="https://i.imgur.com/lUuicew.png" width="400px">
|
||||
<br/>
|
||||
|
||||
<a href="https://archivebox.zervice.io">DEMO: archivebox.zervice.io/</a>
|
||||
For more information, see the <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart">full Quickstart guide</a>, <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Usage">Usage</a>, and <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration">Configuration</a> docs.
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
|
||||
# Overview
|
||||
|
||||
ArchiveBox is a command line tool, self-hostable web-archiving server, and Python library all-in-one. It's available as a Python3 package or a Docker image, both methods provide the same CLI, Web UI, and on-disk data format.
|
||||
|
||||
It works on Docker, macOS, and Linux/BSD. Windows is not officially supported, but users have reported getting it working using the WSL2 + Docker.
|
||||
|
||||
To use ArchiveBox you start by creating a folder for your data to live in (it can be anywhere on your system), and running `archivebox init` inside of it. That will create a sqlite3 index and an `ArchiveBox.conf` file. After that, you can continue to add/remove/search/import/export/manage/config/etc using the CLI `archivebox help`, or you can run the Web UI (recommended):
|
||||
```bash
|
||||
archivebox manage createsuperuser
|
||||
archivebox server 0.0.0.0:8000
|
||||
open http://127.0.0.1:8000
|
||||
```
|
||||
|
||||
The CLI is considered "stable", the ArchiveBox Python API and REST APIs are in "beta", and the [desktop app](https://github.com/ArchiveBox/desktop) is in "alpha" stage.
|
||||
|
||||
At the end of the day, the goal is to sleep soundly knowing that the part of the internet you care about will be automatically preserved in multiple, durable long-term formats that will be accessible for decades (or longer). You can also self-host your archivebox server on a public domain to provide archive.org-style public access to your site snapshots.
|
||||
|
||||
<div align="center">
|
||||
<img src="https://i.imgur.com/3tBL7PU.png" width="22%" alt="CLI Screenshot" align="top">
|
||||
<img src="https://i.imgur.com/viklZNG.png" width="22%" alt="Desktop index screenshot" align="top">
|
||||
<img src="https://i.imgur.com/RefWsXB.jpg" width="22%" alt="Desktop details page Screenshot"/>
|
||||
<img src="https://i.imgur.com/M6HhzVx.png" width="22%" alt="Desktop details page Screenshot"/><br/>
|
||||
<sup><a href="https://archive.sweeting.me/">Demo</a> | <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Usage">Usage</a> | <a href="#screenshots">Screenshots</a></sup>
|
||||
<br/>
|
||||
<sub>. . . . . . . . . . . . . . . . . . . . . . . . . . . .</sub>
|
||||
</div><br/>
|
||||
|
||||
|
||||
## Key Features
|
||||
|
||||
- [**Free & open source**](https://github.com/ArchiveBox/ArchiveBox/blob/master/LICENSE), doesn't require signing up for anything, stores all data locally
|
||||
- [**Few dependencies**](https://github.com/ArchiveBox/ArchiveBox/wiki/Install#dependencies) and [simple command line interface](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#CLI-Usage)
|
||||
- [**Comprehensive documentation**](https://github.com/ArchiveBox/ArchiveBox/wiki), [active development](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap), and [rich community](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community)
|
||||
- Easy to set up **[scheduled importing](https://github.com/ArchiveBox/ArchiveBox/wiki/Scheduled-Archiving) from multiple sources**
|
||||
- Uses common, **durable, [long-term formats](#saves-lots-of-useful-stuff-for-each-imported-link)** like HTML, JSON, PDF, PNG, and WARC
|
||||
- ~~**Suitable for paywalled / [authenticated content](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#chrome_user_data_dir)** (can use your cookies)~~ (do not do this until v0.5 is released with some security fixes)
|
||||
- **Doesn't require a constantly-running daemon**, proxy, or native app
|
||||
- Provides a CLI, Python API, self-hosted web UI, and REST API (WIP)
|
||||
- Architected to be able to run [**many varieties of scripts during archiving**](https://github.com/ArchiveBox/ArchiveBox/issues/51), e.g. to extract media, summarize articles, [scroll pages](https://github.com/ArchiveBox/ArchiveBox/issues/80), [close modals](https://github.com/ArchiveBox/ArchiveBox/issues/175), expand comment threads, etc.
|
||||
- Can also [**mirror content to 3rd-party archiving services**](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#submit_archive_dot_org) automatically for redundancy
|
||||
|
||||
## Input formats
|
||||
|
||||
ArchiveBox supports many input formats for URLs, including Pocket & Pinboard exports, Browser bookmarks, Browser history, plain text, HTML, markdown, and more!
|
||||
|
||||
```bash
|
||||
echo 'http://example.com' | archivebox add
|
||||
archivebox add 'https://example.com/some/page'
|
||||
archivebox add < ~/Downloads/firefox_bookmarks_export.html
|
||||
archivebox add < any_text_with_urls_in_it.txt
|
||||
archivebox add --depth=1 'https://example.com/some/downloads.html'
|
||||
archivebox add --depth=1 'https://news.ycombinator.com#2020-12-12'
|
||||
```
|
||||
|
||||
- <img src="https://nicksweeting.com/images/bookmarks.png" height="22px"/> Browser history or bookmarks exports (Chrome, Firefox, Safari, IE, Opera, and more)
|
||||
- <img src="https://nicksweeting.com/images/rss.svg" height="22px"/> RSS, XML, JSON, CSV, SQL, HTML, Markdown, TXT, or any other text-based format
|
||||
- <img src="https://getpocket.com/favicon.ico" height="22px"/> Pocket, Pinboard, Instapaper, Shaarli, Delicious, Reddit Saved Posts, Wallabag, Unmark.it, OneTab, and more
|
||||
|
||||
See the [Usage: CLI](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#CLI-Usage) page for documentation and examples.
|
||||
|
||||
It also includes a built-in scheduled import feature and browser bookmarklet, so you can ingest URLs from RSS feeds, websites, or the filesystem regularly.
|
||||
|
||||
## Output formats
|
||||
|
||||
All of ArchiveBox's state (including the index, snapshot data, and config file) is stored in a single folder called the "ArchiveBox data folder". All `archivebox` CLI commands must be run from inside this folder, and you first create it by running `archivebox init`.
|
||||
|
||||
The on-disk layout is optimized to be easy to browse by hand and durable long-term. The main index is a standard sqlite3 database (it can also be exported as static JSON/HTML), and the archive snapshots are organized by date-added timestamp in the `archive/` subfolder. Each snapshot subfolder includes a static JSON and HTML index describing its contents, and the snapshot extrator outputs are plain files within the folder (e.g. `media/example.mp4`, `git/somerepo.git`, `static/someimage.png`, etc.)
|
||||
|
||||
```bash
|
||||
ls ./archive/<timestamp>/
|
||||
```
|
||||
|
||||
- **Index:** `index.html` & `index.json` HTML and JSON index files containing metadata and details
|
||||
- **Title:** `title` title of the site
|
||||
- **Favicon:** `favicon.ico` favicon of the site
|
||||
- **WGET Clone:** `example.com/page-name.html` wget clone of the site, with .html appended if not present
|
||||
- **WARC:** `warc/<timestamp>.gz` gzipped WARC of all the resources fetched while archiving
|
||||
- **PDF:** `output.pdf` Printed PDF of site using headless chrome
|
||||
- **Screenshot:** `screenshot.png` 1440x900 screenshot of site using headless chrome
|
||||
- **DOM Dump:** `output.html` DOM Dump of the HTML after rendering using headless chrome
|
||||
- **URL to Archive.org:** `archive.org.txt` A link to the saved site on archive.org
|
||||
- **Audio & Video:** `media/` all audio/video files + playlists, including subtitles & metadata with youtube-dl
|
||||
- **Source Code:** `git/` clone of any repository found on github, bitbucket, or gitlab links
|
||||
- _More coming soon! See the [Roadmap](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap)..._
|
||||
|
||||
It does everything out-of-the-box by default, but you can disable or tweak [individual archive methods](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration) via environment variables or config file.
|
||||
|
||||
## Dependencies
|
||||
|
||||
You don't need to install all the dependencies, ArchiveBox will automatically enable the relevant modules based on whatever you have available, but it's recommended to use the official [Docker image](https://github.com/ArchiveBox/ArchiveBox/wiki/Docker) with everything preinstalled.
|
||||
|
||||
If you so choose, you can also install ArchiveBox and its dependencies directly on any Linux or macOS systems using the [automated setup script](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart) or the [system package manager](https://github.com/ArchiveBox/ArchiveBox/wiki/Install).
|
||||
|
||||
ArchiveBox is written in Python 3 so it requires `python3` and `pip3` available on your system. It also uses a set of optional, but highly recommended external dependencies for archiving sites: `wget` (for plain HTML, static files, and WARC saving), `chromium` (for screenshots, PDFs, JS execution, and more), `youtube-dl` (for audio and video), `git` (for cloning git repos), and `nodejs` (for readability and singlefile), and more.
|
||||
|
||||
## Caveats
|
||||
|
||||
If you're importing URLs containing secret slugs or pages with private content (e.g Google Docs, CodiMD notepads, etc), you may want to disable some of the extractor modules to avoid leaking private URLs to 3rd party APIs during the archiving process.
|
||||
```bash
|
||||
# don't do this:
|
||||
archivebox add 'https://docs.google.com/document/d/12345somelongsecrethere'
|
||||
archivebox add 'https://example.com/any/url/you/want/to/keep/secret/'
|
||||
|
||||
# without first disabling share the URL with 3rd party APIs:
|
||||
archivebox config --set SAVE_ARCHIVE_DOT_ORG=False # disable saving all URLs in Archive.org
|
||||
archivebox config --set SAVE_FAVICON=False # optional: only the domain is leaked, not full URL
|
||||
archivebox config --get CHROME_VERSION # optional: set this to chromium instead of chrome if you don't like Google
|
||||
```
|
||||
|
||||
Be aware that malicious archived JS can also read the contents of other pages in your archive due to snapshot CSRF and XSS protections being imperfect. See the [Security Overview](https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#stealth-mode) page for more details.
|
||||
```bash
|
||||
# visiting an archived page with malicious JS:
|
||||
https://127.0.0.1:8000/archive/1602401954/example.com/index.html
|
||||
|
||||
# example.com/index.js can now make a request to read everything:
|
||||
https://127.0.0.1:8000/index.html
|
||||
https://127.0.0.1:8000/archive/*
|
||||
# then example.com/index.js can send it off to some evil server
|
||||
```
|
||||
|
||||
Support for saving multiple snapshots of each site over time will be [added soon](https://github.com/ArchiveBox/ArchiveBox/issues/179) (along with the ability to view diffs of the changes between runs). For now ArchiveBox is designed to only archive each URL with each extractor type once. A workaround to take multiple snapshots of the same URL is to make them slightly different by adding a hash:
|
||||
```bash
|
||||
archivebox add 'https://example.com#2020-10-24'
|
||||
...
|
||||
archivebox add 'https://example.com#2020-10-25'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
# Setup
|
||||
|
||||
## Docker Compose
|
||||
|
||||
*This is the recommended way of running ArchiveBox.*
|
||||
|
||||
It comes with everything working out of the box, including all extractors,
|
||||
a headless browser runtime, a full webserver, and CLI interface.
|
||||
|
||||
```bash
|
||||
# docker-compose run archivebox <command> [args]
|
||||
|
||||
mkdir archivebox && cd archivebox
|
||||
wget 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/docker-compose.yml'
|
||||
docker-compose run archivebox init
|
||||
docker-compose run archivebox add 'https://example.com'
|
||||
docker-compose run archivebox manage createsuperuser
|
||||
docker-compose up
|
||||
open http://127.0.0.1:8000
|
||||
```
|
||||
|
||||
## Docker
|
||||
|
||||
```bash
|
||||
# docker run -v $PWD:/data -it archivebox/archivebox <command> [args]
|
||||
|
||||
mkdir archivebox && cd archivebox
|
||||
docker run -v $PWD:/data -it archivebox/archivebox init
|
||||
docker run -v $PWD:/data -it archivebox/archivebox add 'https://example.com'
|
||||
docker run -v $PWD:/data -it archivebox/archivebox manage createsuperuser
|
||||
|
||||
# run the webserver to access the web UI
|
||||
docker run -v $PWD:/data -it -p 8000:8000 archivebox/archivebox server 0.0.0.0:8000
|
||||
open http://127.0.0.1:8000
|
||||
|
||||
# or export a static version of the index if you dont want to run a server
|
||||
docker run -v $PWD:/data -it archivebox/archivebox list --html --with-headers > index.html
|
||||
docker run -v $PWD:/data -it archivebox/archivebox list --json --with-headers > index.json
|
||||
open ./index.html
|
||||
```
|
||||
|
||||
|
||||
## Bare Metal
|
||||
|
||||
```bash
|
||||
# archivebox <command> [args]
|
||||
```
|
||||
|
||||
First install the system, pip, and npm dependencies:
|
||||
```bash
|
||||
# Install main dependendencies using apt on Ubuntu/Debian, brew on mac, or pkg on BSD
|
||||
apt install python3 python3-pip python3-dev git curl wget chromium-browser youtube-dl
|
||||
|
||||
# Install Node runtime (used for headless browser scripts like Readability, Singlefile, Mercury, etc.)
|
||||
curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
|
||||
&& echo 'deb https://deb.nodesource.com/node_14.x $(lsb_release -cs) main' >> /etc/apt/sources.list \
|
||||
&& apt-get update \
|
||||
&& apt-get install --no-install-recommends nodejs
|
||||
|
||||
# Make a directory to hold your collection
|
||||
mkdir archivebox && cd archivebox # (can be anywhere, doesn't have to be called archivebox)
|
||||
|
||||
# Install the archivebox python package in ./.venv
|
||||
python3 -m venv .venv && source .venv/bin/activate
|
||||
pip install --upgrade archivebox
|
||||
|
||||
# Install node packages in ./node_modules (used for SingleFile, Readability, and Puppeteer)
|
||||
npm install --prefix . 'git+https://github.com/ArchiveBox/ArchiveBox.git'
|
||||
```
|
||||
|
||||
Initialize your archive and add some links:
|
||||
```bash
|
||||
archivebox init
|
||||
archivebox add 'https://example.com' # add URLs as args pipe them in via stdin
|
||||
archivebox add --depth=1 https://example.com/table-of-contents.html
|
||||
# it can injest links from many formats, including RSS/JSON/XML/MD/TXT and more
|
||||
curl https://getpocket.com/users/USERNAME/feed/all | archivebox add
|
||||
```
|
||||
|
||||
Start the webserver to access the web UI:
|
||||
```bash
|
||||
archivebox manage createsuperuser
|
||||
archivebox server 0.0.0.0:8000
|
||||
|
||||
open http://127.0.0.1:8000
|
||||
```
|
||||
|
||||
Or export a static HTML version of the index if you don't want to run a webserver:
|
||||
```bash
|
||||
archivebox list --html --with-headers > index.html
|
||||
archivebox list --json --with-headers > index.json
|
||||
open ./index.html
|
||||
```
|
||||
|
||||
To view more information about your dependencies, data, or the CLI:
|
||||
```bash
|
||||
archivebox version
|
||||
archivebox status
|
||||
archivebox help
|
||||
```
|
||||
---
|
||||
|
||||
<div align="center">
|
||||
<img src="https://i.imgur.com/PVO88AZ.png" width="80%"/>
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
# Background & Motivation
|
||||
|
||||
Vast treasure troves of knowledge are lost every day on the internet to link rot. As a society, we have an imperative to preserve some important parts of that treasure, just like we preserve our books, paintings, and music in physical libraries long after the originals go out of print or fade into obscurity.
|
||||
|
||||
Whether it's to resist censorship by saving articles before they get taken down or edited, or
|
||||
just to save a collection of early 2010's flash games you love to play, having the tools to
|
||||
archive internet content enables to you save the stuff you care most about before it disappears.
|
||||
|
||||
<div align="center">
|
||||
<img src="https://i.imgur.com/bC6eZcV.png" width="50%"/><br/>
|
||||
<sup><i>Image from <a href="https://digiday.com/media/wtf-link-rot/">WTF is Link Rot?</a>...</i><br/></sup>
|
||||
</div>
|
||||
|
||||
The balance between the permanence and ephemeral nature of content on the internet is part of what makes it beautiful.
|
||||
I don't think everything should be preserved in an automated fashion, making all content permanent and never removable, but I do think people should be able to decide for themselves and effectively archive specific content that they care about.
|
||||
|
||||
Because modern websites are complicated and often rely on dynamic content,
|
||||
ArchiveBox archives the sites in **several different formats** beyond what public archiving services like Archive.org and Archive.is are capable of saving. Using multiple methods and the market-dominant browser to execute JS ensures we can save even the most complex, finicky websites in at least a few high-quality, long-term data formats.
|
||||
|
||||
All the archived links are stored by date bookmarked in `./archive/<timestamp>`, and everything is indexed nicely with JSON & HTML files. The intent is for all the content to be viewable with common software in 50 - 100 years without needing to run ArchiveBox in a VM.
|
||||
|
||||
## Comparison to Other Projects
|
||||
|
||||
▶ **Check out our [community page](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community) for an index of web archiving initiatives and projects.**
|
||||
|
||||
<img src="https://i.imgur.com/4nkFjdv.png" width="10%" align="left" alt="comparison"/> The aim of ArchiveBox is to go beyond what the Wayback Machine and other public archiving services can do, by adding a headless browser to replay sessions accurately, and by automatically extracting all the content in multiple redundant formats that will survive being passed down to historians and archivists through many generations.
|
||||
|
||||
#### User Interface & Intended Purpose
|
||||
|
||||
ArchiveBox differentiates itself from [similar projects](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#Web-Archiving-Projects) by being a simple, one-shot CLI interface for users to ingest bulk feeds of URLs over extended periods, as opposed to being a backend service that ingests individual, manually-submitted URLs from a web UI. However, we also have the option to add urls via a web interface through our Django frontend.
|
||||
|
||||
#### Private Local Archives vs Centralized Public Archives
|
||||
|
||||
Unlike crawler software that starts from a seed URL and works outwards, or public tools like Archive.org designed for users to manually submit links from the public internet, ArchiveBox tries to be a set-and-forget archiver suitable for archiving your entire browsing history, RSS feeds, or bookmarks, ~~including private/authenticated content that you wouldn't otherwise share with a centralized service~~ (do not do this until v0.5 is released with some security fixes). Also by having each user store their own content locally, we can save much larger portions of everyone's browsing history than a shared centralized service would be able to handle.
|
||||
|
||||
#### Storage Requirements
|
||||
|
||||
Because ArchiveBox is designed to ingest a firehose of browser history and bookmark feeds to a local disk, it can be much more disk-space intensive than a centralized service like the Internet Archive or Archive.today. However, as storage space gets cheaper and compression improves, you should be able to use it continuously over the years without having to delete anything. In my experience, ArchiveBox uses about 5gb per 1000 articles, but your milage may vary depending on which options you have enabled and what types of sites you're archiving. By default, it archives everything in as many formats as possible, meaning it takes more space than a using a single method, but more content is accurately replayable over extended periods of time. Storage requirements can be reduced by using a compressed/deduplicated filesystem like ZFS/BTRFS, or by setting `SAVE_MEDIA=False` to skip audio & video files.
|
||||
|
||||
## Learn more
|
||||
|
||||
Whether you want to learn which organizations are the big players in the web archiving space, want to find a specific open-source tool for your web archiving need, or just want to see where archivists hang out online, our Community Wiki page serves as an index of the broader web archiving community. Check it out to learn about some of the coolest web archiving projects and communities on the web!
|
||||
|
||||
<img src="https://i.imgur.com/0ZOmOvN.png" width="14%" align="right"/>
|
||||
|
||||
- [Community Wiki](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community)
|
||||
- [The Master Lists](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#The-Master-Lists)
|
||||
_Community-maintained indexes of archiving tools and institutions._
|
||||
- [Web Archiving Software](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#Web-Archiving-Projects)
|
||||
_Open source tools and projects in the internet archiving space._
|
||||
- [Reading List](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#Reading-List)
|
||||
_Articles, posts, and blogs relevant to ArchiveBox and web archiving in general._
|
||||
- [Communities](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#Communities)
|
||||
_A collection of the most active internet archiving communities and initiatives._
|
||||
- Check out the ArchiveBox [Roadmap](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap) and [Changelog](https://github.com/ArchiveBox/ArchiveBox/wiki/Changelog)
|
||||
- Learn why archiving the internet is important by reading the "[On the Importance of Web Archiving](https://parameters.ssrc.org/2018/09/on-the-importance-of-web-archiving/)" blog post.
|
||||
- Or reach out to me for questions and comments via [@ArchiveBoxApp](https://twitter.com/ArchiveBoxApp) or [@theSquashSH](https://twitter.com/thesquashSH) on Twitter.
|
||||
|
||||
---
|
||||
|
||||
# Documentation
|
||||
|
||||
<img src="https://read-the-docs-guidelines.readthedocs-hosted.com/_images/logo-dark.png" width="13%" align="right"/>
|
||||
|
||||
We use the [Github wiki system](https://github.com/ArchiveBox/ArchiveBox/wiki) and [Read the Docs](https://archivebox.readthedocs.io/en/latest/) (WIP) for documentation.
|
||||
|
||||
You can also access the docs locally by looking in the [`ArchiveBox/docs/`](https://github.com/ArchiveBox/ArchiveBox/wiki/Home) folder.
|
||||
|
||||
## Getting Started
|
||||
|
||||
- [Quickstart](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart)
|
||||
- [Install](https://github.com/ArchiveBox/ArchiveBox/wiki/Install)
|
||||
- [Docker](https://github.com/ArchiveBox/ArchiveBox/wiki/Docker)
|
||||
|
||||
## Reference
|
||||
|
||||
- [Usage](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage)
|
||||
- [Configuration](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration)
|
||||
- [Supported Sources](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive)
|
||||
- [Supported Outputs](https://github.com/ArchiveBox/ArchiveBox/wiki#can-save-these-things-for-each-site)
|
||||
- [Scheduled Archiving](https://github.com/ArchiveBox/ArchiveBox/wiki/Scheduled-Archiving)
|
||||
- [Publishing Your Archive](https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive)
|
||||
- [Chromium Install](https://github.com/ArchiveBox/ArchiveBox/wiki/Install-Chromium)
|
||||
- [Security Overview](https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview)
|
||||
- [Troubleshooting](https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting)
|
||||
- [Python API](https://docs.archivebox.io/en/latest/modules.html)
|
||||
- REST API (coming soon...)
|
||||
|
||||
## More Info
|
||||
|
||||
- [Tickets](https://github.com/ArchiveBox/ArchiveBox/issues)
|
||||
- [Roadmap](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap)
|
||||
- [Changelog](https://github.com/ArchiveBox/ArchiveBox/wiki/Changelog)
|
||||
- [Donations](https://github.com/ArchiveBox/ArchiveBox/wiki/Donations)
|
||||
- [Background & Motivation](https://github.com/ArchiveBox/ArchiveBox#background--motivation)
|
||||
- [Web Archiving Community](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community)
|
||||
|
||||
---
|
||||
|
||||
# ArchiveBox Development
|
||||
|
||||
All contributions to ArchiveBox are welcomed! Check our [issues](https://github.com/ArchiveBox/ArchiveBox/issues) and [Roadmap](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap) for things to work on, and please open an issue to discuss your proposed implementation before working on things! Otherwise we may have to close your PR if it doesn't align with our roadmap.
|
||||
|
||||
### Setup the dev environment
|
||||
|
||||
First, install the system dependencies from the "Bare Metal" section above.
|
||||
Then you can clone the ArchiveBox repo and install
|
||||
```python3
|
||||
git clone https://github.com/ArchiveBox/ArchiveBox
|
||||
cd ArchiveBox
|
||||
git checkout master # or the branch you want to test
|
||||
git pull
|
||||
|
||||
# Install ArchiveBox + python dependencies
|
||||
python3 -m venv .venv && source .venv/bin/activate && pip install -e .[dev]
|
||||
# or
|
||||
pipenv install --dev && pipenv shell
|
||||
|
||||
# Install node dependencies
|
||||
npm install
|
||||
|
||||
# Optional: install the extractor dependencies
|
||||
./bin/setup.sh
|
||||
|
||||
# Optional: develop via docker by mounting the code dir into the container
|
||||
# if you edit e.g. ./archivebox/core/models.py on the docker host, runserver
|
||||
# inside the container will reload and pick up your changes
|
||||
docker build . -t archivebox
|
||||
docker run -it -p 8000:8000 \
|
||||
-v $PWD/data:/data \
|
||||
-v $PWD/archivebox:/app/archivebox \
|
||||
archivebox server 0.0.0.0:8000 --debug --reload
|
||||
```
|
||||
|
||||
### Common development tasks
|
||||
|
||||
See the `./bin/` folder and read the source of the bash scripts within.
|
||||
You can also run all these in Docker. For more examples see the Github Actions CI/CD tests that are run: `.github/workflows/*.yaml`.
|
||||
|
||||
#### Run the linters
|
||||
|
||||
```bash
|
||||
./bin/lint.sh
|
||||
```
|
||||
(uses `flake8` and `mypy`)
|
||||
|
||||
#### Run the integration tests
|
||||
|
||||
```bash
|
||||
./bin/test.sh
|
||||
```
|
||||
(uses `pytest -s`)
|
||||
|
||||
#### Build the docs, pip package, and docker image
|
||||
|
||||
```bash
|
||||
./bin/build.sh
|
||||
|
||||
# or individually:
|
||||
./bin/build_docs.sh
|
||||
./bin/build_pip.sh
|
||||
./bin/build_docker.sh
|
||||
```
|
||||
|
||||
#### Roll a release
|
||||
|
||||
```bash
|
||||
./bin/release.sh
|
||||
```
|
||||
(bumps the version, builds, and pushes a release to PyPI, Docker Hub, and Github Packages)
|
||||
|
||||
|
||||
---
|
||||
|
||||
<div align="center">
|
||||
<br/><br/>
|
||||
<img src="https://raw.githubusercontent.com/Monadical-SAS/redux-time/HEAD/examples/static/jeremy.jpg" height="40px"/>
|
||||
<br/>
|
||||
<sub><i>This project is maintained mostly in <a href="https://nicksweeting.com/blog#About">my spare time</a> with the help from generous contributors and Monadical.com.</i></sub>
|
||||
<br/><br/>
|
||||
|
||||
<br/>
|
||||
<a href="https://github.com/sponsors/pirate">Sponsor us on Github</a>
|
||||
<br>
|
||||
<br>
|
||||
<a href="https://www.patreon.com/theSquashSH"><img src="https://img.shields.io/badge/Donate_to_support_development-via_Patreon-%23DD5D76.svg?style=flat"/></a>
|
||||
<br/>
|
||||
|
||||
<a href="https://twitter.com/ArchiveBoxApp"><img src="https://img.shields.io/badge/Tweet-%40ArchiveBoxApp-blue.svg?style=flat"/></a>
|
||||
<a href="https://github.com/ArchiveBox/ArchiveBox"><img src="https://img.shields.io/github/stars/ArchiveBox/ArchiveBox.svg?style=flat&label=Star+on+Github"/></a>
|
||||
|
||||
<br/><br/>
|
||||
|
||||
</div>
|
||||
|
||||
Platform: UNKNOWN
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Natural Language :: English
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Development Status :: 4 - Beta
|
||||
Classifier: Topic :: Utilities
|
||||
Classifier: Topic :: System :: Archiving
|
||||
Classifier: Topic :: System :: Archiving :: Backup
|
||||
Classifier: Topic :: System :: Recovery Tools
|
||||
Classifier: Topic :: Sociology :: History
|
||||
Classifier: Topic :: Internet :: WWW/HTTP
|
||||
Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
|
||||
Classifier: Topic :: Internet :: WWW/HTTP :: WSGI :: Application
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Intended Audience :: Education
|
||||
Classifier: Intended Audience :: End Users/Desktop
|
||||
Classifier: Intended Audience :: Information Technology
|
||||
Classifier: Intended Audience :: Legal Industry
|
||||
Classifier: Intended Audience :: System Administrators
|
||||
Classifier: Environment :: Console
|
||||
Classifier: Environment :: Web Environment
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Framework :: Django
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.7
|
||||
Description-Content-Type: text/markdown
|
||||
Provides-Extra: dev
|
|
@ -1,128 +0,0 @@
|
|||
MANIFEST.in
|
||||
README.md
|
||||
setup.py
|
||||
archivebox/.flake8
|
||||
archivebox/LICENSE
|
||||
archivebox/README.md
|
||||
archivebox/__init__.py
|
||||
archivebox/__main__.py
|
||||
archivebox/base32_crockford.py
|
||||
archivebox/config.py
|
||||
archivebox/config_stubs.py
|
||||
archivebox/logging_util.py
|
||||
archivebox/main.py
|
||||
archivebox/manage.py
|
||||
archivebox/mypy.ini
|
||||
archivebox/package.json
|
||||
archivebox/system.py
|
||||
archivebox/util.py
|
||||
archivebox.egg-info/PKG-INFO
|
||||
archivebox.egg-info/SOURCES.txt
|
||||
archivebox.egg-info/dependency_links.txt
|
||||
archivebox.egg-info/entry_points.txt
|
||||
archivebox.egg-info/requires.txt
|
||||
archivebox.egg-info/top_level.txt
|
||||
archivebox/cli/__init__.py
|
||||
archivebox/cli/archivebox_add.py
|
||||
archivebox/cli/archivebox_config.py
|
||||
archivebox/cli/archivebox_help.py
|
||||
archivebox/cli/archivebox_init.py
|
||||
archivebox/cli/archivebox_list.py
|
||||
archivebox/cli/archivebox_manage.py
|
||||
archivebox/cli/archivebox_oneshot.py
|
||||
archivebox/cli/archivebox_remove.py
|
||||
archivebox/cli/archivebox_schedule.py
|
||||
archivebox/cli/archivebox_server.py
|
||||
archivebox/cli/archivebox_shell.py
|
||||
archivebox/cli/archivebox_status.py
|
||||
archivebox/cli/archivebox_update.py
|
||||
archivebox/cli/archivebox_version.py
|
||||
archivebox/cli/tests.py
|
||||
archivebox/core/__init__.py
|
||||
archivebox/core/admin.py
|
||||
archivebox/core/apps.py
|
||||
archivebox/core/forms.py
|
||||
archivebox/core/models.py
|
||||
archivebox/core/settings.py
|
||||
archivebox/core/tests.py
|
||||
archivebox/core/urls.py
|
||||
archivebox/core/utils.py
|
||||
archivebox/core/utils_taggit.py
|
||||
archivebox/core/views.py
|
||||
archivebox/core/welcome_message.py
|
||||
archivebox/core/wsgi.py
|
||||
archivebox/core/management/commands/archivebox.py
|
||||
archivebox/core/migrations/0001_initial.py
|
||||
archivebox/core/migrations/0002_auto_20200625_1521.py
|
||||
archivebox/core/migrations/0003_auto_20200630_1034.py
|
||||
archivebox/core/migrations/0004_auto_20200713_1552.py
|
||||
archivebox/core/migrations/0005_auto_20200728_0326.py
|
||||
archivebox/core/migrations/0006_auto_20201012_1520.py
|
||||
archivebox/core/migrations/__init__.py
|
||||
archivebox/extractors/__init__.py
|
||||
archivebox/extractors/archive_org.py
|
||||
archivebox/extractors/dom.py
|
||||
archivebox/extractors/favicon.py
|
||||
archivebox/extractors/git.py
|
||||
archivebox/extractors/headers.py
|
||||
archivebox/extractors/media.py
|
||||
archivebox/extractors/mercury.py
|
||||
archivebox/extractors/pdf.py
|
||||
archivebox/extractors/readability.py
|
||||
archivebox/extractors/screenshot.py
|
||||
archivebox/extractors/singlefile.py
|
||||
archivebox/extractors/title.py
|
||||
archivebox/extractors/wget.py
|
||||
archivebox/index/__init__.py
|
||||
archivebox/index/csv.py
|
||||
archivebox/index/html.py
|
||||
archivebox/index/json.py
|
||||
archivebox/index/schema.py
|
||||
archivebox/index/sql.py
|
||||
archivebox/parsers/__init__.py
|
||||
archivebox/parsers/generic_html.py
|
||||
archivebox/parsers/generic_json.py
|
||||
archivebox/parsers/generic_rss.py
|
||||
archivebox/parsers/generic_txt.py
|
||||
archivebox/parsers/medium_rss.py
|
||||
archivebox/parsers/netscape_html.py
|
||||
archivebox/parsers/pinboard_rss.py
|
||||
archivebox/parsers/pocket_html.py
|
||||
archivebox/parsers/shaarli_rss.py
|
||||
archivebox/parsers/wallabag_atom.py
|
||||
archivebox/themes/admin/actions_as_select.html
|
||||
archivebox/themes/admin/app_index.html
|
||||
archivebox/themes/admin/base.html
|
||||
archivebox/themes/admin/login.html
|
||||
archivebox/themes/default/add_links.html
|
||||
archivebox/themes/default/base.html
|
||||
archivebox/themes/default/main_index.html
|
||||
archivebox/themes/default/core/snapshot_list.html
|
||||
archivebox/themes/default/static/add.css
|
||||
archivebox/themes/default/static/admin.css
|
||||
archivebox/themes/default/static/archive.png
|
||||
archivebox/themes/default/static/bootstrap.min.css
|
||||
archivebox/themes/default/static/external.png
|
||||
archivebox/themes/default/static/jquery.dataTables.min.css
|
||||
archivebox/themes/default/static/jquery.dataTables.min.js
|
||||
archivebox/themes/default/static/jquery.min.js
|
||||
archivebox/themes/default/static/sort_asc.png
|
||||
archivebox/themes/default/static/sort_both.png
|
||||
archivebox/themes/default/static/sort_desc.png
|
||||
archivebox/themes/default/static/spinner.gif
|
||||
archivebox/themes/legacy/favicon.ico
|
||||
archivebox/themes/legacy/link_details.html
|
||||
archivebox/themes/legacy/main_index.html
|
||||
archivebox/themes/legacy/main_index_minimal.html
|
||||
archivebox/themes/legacy/main_index_row.html
|
||||
archivebox/themes/legacy/robots.txt
|
||||
archivebox/themes/legacy/static/archive.png
|
||||
archivebox/themes/legacy/static/bootstrap.min.css
|
||||
archivebox/themes/legacy/static/external.png
|
||||
archivebox/themes/legacy/static/jquery.dataTables.min.css
|
||||
archivebox/themes/legacy/static/jquery.dataTables.min.js
|
||||
archivebox/themes/legacy/static/jquery.min.js
|
||||
archivebox/themes/legacy/static/sort_asc.png
|
||||
archivebox/themes/legacy/static/sort_both.png
|
||||
archivebox/themes/legacy/static/sort_desc.png
|
||||
archivebox/themes/legacy/static/spinner.gif
|
|
@ -1 +0,0 @@
|
|||
|
|
@ -1,3 +0,0 @@
|
|||
[console_scripts]
|
||||
archivebox = archivebox.cli:main
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
requests==2.24.0
|
||||
atomicwrites==1.4.0
|
||||
mypy-extensions==0.4.3
|
||||
django==3.0.8
|
||||
django-extensions==3.0.3
|
||||
dateparser
|
||||
ipython
|
||||
youtube-dl
|
||||
python-crontab==2.5.1
|
||||
croniter==0.3.34
|
||||
w3lib==1.22.0
|
||||
|
||||
[dev]
|
||||
setuptools
|
||||
twine
|
||||
flake8
|
||||
ipdb
|
||||
mypy
|
||||
django-stubs
|
||||
sphinx
|
||||
sphinx-rtd-theme
|
||||
recommonmark
|
||||
pytest
|
||||
bottle
|
||||
stdeb
|
|
@ -1 +0,0 @@
|
|||
archivebox
|
|
@ -1,172 +0,0 @@
|
|||
"""
|
||||
base32-crockford
|
||||
================
|
||||
|
||||
A Python module implementing the alternate base32 encoding as described
|
||||
by Douglas Crockford at: http://www.crockford.com/wrmg/base32.html.
|
||||
|
||||
He designed the encoding to:
|
||||
|
||||
* Be human and machine readable
|
||||
* Be compact
|
||||
* Be error resistant
|
||||
* Be pronounceable
|
||||
|
||||
It uses a symbol set of 10 digits and 22 letters, excluding I, L O and
|
||||
U. Decoding is not case sensitive, and 'i' and 'l' are converted to '1'
|
||||
and 'o' is converted to '0'. Encoding uses only upper-case characters.
|
||||
|
||||
Hyphens may be present in symbol strings to improve readability, and
|
||||
are removed when decoding.
|
||||
|
||||
A check symbol can be appended to a symbol string to detect errors
|
||||
within the string.
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
PY3 = sys.version_info[0] == 3
|
||||
|
||||
if not PY3:
|
||||
import string as str
|
||||
|
||||
|
||||
__all__ = ["encode", "decode", "normalize"]
|
||||
|
||||
|
||||
if PY3:
|
||||
string_types = (str,)
|
||||
else:
|
||||
string_types = (basestring,) # noqa
|
||||
|
||||
# The encoded symbol space does not include I, L, O or U
|
||||
symbols = '0123456789ABCDEFGHJKMNPQRSTVWXYZ'
|
||||
# These five symbols are exclusively for checksum values
|
||||
check_symbols = '*~$=U'
|
||||
|
||||
encode_symbols = dict((i, ch) for (i, ch) in enumerate(symbols + check_symbols))
|
||||
decode_symbols = dict((ch, i) for (i, ch) in enumerate(symbols + check_symbols))
|
||||
normalize_symbols = str.maketrans('IiLlOo', '111100')
|
||||
valid_symbols = re.compile('^[%s]+[%s]?$' % (symbols,
|
||||
re.escape(check_symbols)))
|
||||
|
||||
base = len(symbols)
|
||||
check_base = len(symbols + check_symbols)
|
||||
|
||||
|
||||
def encode(number, checksum=False, split=0):
|
||||
"""Encode an integer into a symbol string.
|
||||
|
||||
A ValueError is raised on invalid input.
|
||||
|
||||
If checksum is set to True, a check symbol will be
|
||||
calculated and appended to the string.
|
||||
|
||||
If split is specified, the string will be divided into
|
||||
clusters of that size separated by hyphens.
|
||||
|
||||
The encoded string is returned.
|
||||
"""
|
||||
number = int(number)
|
||||
if number < 0:
|
||||
raise ValueError("number '%d' is not a positive integer" % number)
|
||||
|
||||
split = int(split)
|
||||
if split < 0:
|
||||
raise ValueError("split '%d' is not a positive integer" % split)
|
||||
|
||||
check_symbol = ''
|
||||
if checksum:
|
||||
check_symbol = encode_symbols[number % check_base]
|
||||
|
||||
if number == 0:
|
||||
return '0' + check_symbol
|
||||
|
||||
symbol_string = ''
|
||||
while number > 0:
|
||||
remainder = number % base
|
||||
number //= base
|
||||
symbol_string = encode_symbols[remainder] + symbol_string
|
||||
symbol_string = symbol_string + check_symbol
|
||||
|
||||
if split:
|
||||
chunks = []
|
||||
for pos in range(0, len(symbol_string), split):
|
||||
chunks.append(symbol_string[pos:pos + split])
|
||||
symbol_string = '-'.join(chunks)
|
||||
|
||||
return symbol_string
|
||||
|
||||
|
||||
def decode(symbol_string, checksum=False, strict=False):
|
||||
"""Decode an encoded symbol string.
|
||||
|
||||
If checksum is set to True, the string is assumed to have a
|
||||
trailing check symbol which will be validated. If the
|
||||
checksum validation fails, a ValueError is raised.
|
||||
|
||||
If strict is set to True, a ValueError is raised if the
|
||||
normalization step requires changes to the string.
|
||||
|
||||
The decoded string is returned.
|
||||
"""
|
||||
symbol_string = normalize(symbol_string, strict=strict)
|
||||
if checksum:
|
||||
symbol_string, check_symbol = symbol_string[:-1], symbol_string[-1]
|
||||
|
||||
number = 0
|
||||
for symbol in symbol_string:
|
||||
number = number * base + decode_symbols[symbol]
|
||||
|
||||
if checksum:
|
||||
check_value = decode_symbols[check_symbol]
|
||||
modulo = number % check_base
|
||||
if check_value != modulo:
|
||||
raise ValueError("invalid check symbol '%s' for string '%s'" %
|
||||
(check_symbol, symbol_string))
|
||||
|
||||
return number
|
||||
|
||||
|
||||
def normalize(symbol_string, strict=False):
|
||||
"""Normalize an encoded symbol string.
|
||||
|
||||
Normalization provides error correction and prepares the
|
||||
string for decoding. These transformations are applied:
|
||||
|
||||
1. Hyphens are removed
|
||||
2. 'I', 'i', 'L' or 'l' are converted to '1'
|
||||
3. 'O' or 'o' are converted to '0'
|
||||
4. All characters are converted to uppercase
|
||||
|
||||
A TypeError is raised if an invalid string type is provided.
|
||||
|
||||
A ValueError is raised if the normalized string contains
|
||||
invalid characters.
|
||||
|
||||
If the strict parameter is set to True, a ValueError is raised
|
||||
if any of the above transformations are applied.
|
||||
|
||||
The normalized string is returned.
|
||||
"""
|
||||
if isinstance(symbol_string, string_types):
|
||||
if not PY3:
|
||||
try:
|
||||
symbol_string = symbol_string.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
raise ValueError("string should only contain ASCII characters")
|
||||
else:
|
||||
raise TypeError("string is of invalid type %s" %
|
||||
symbol_string.__class__.__name__)
|
||||
|
||||
norm_string = symbol_string.replace('-', '').translate(normalize_symbols).upper()
|
||||
|
||||
if not valid_symbols.match(norm_string):
|
||||
raise ValueError("string '%s' contains invalid characters" % norm_string)
|
||||
|
||||
if strict and norm_string != symbol_string:
|
||||
raise ValueError("string '%s' requires normalization" % symbol_string)
|
||||
|
||||
return norm_string
|
|
@ -19,6 +19,8 @@ meta_cmds = ('help', 'version')
|
|||
main_cmds = ('init', 'info', 'config')
|
||||
archive_cmds = ('add', 'remove', 'update', 'list', 'status')
|
||||
|
||||
fake_db = ("oneshot",)
|
||||
|
||||
display_first = (*meta_cmds, *main_cmds, *archive_cmds)
|
||||
|
||||
# every imported command module must have these properties in order to be valid
|
||||
|
@ -59,6 +61,10 @@ def run_subcommand(subcommand: str,
|
|||
pwd: Union[Path, str, None]=None) -> None:
|
||||
"""Run a given ArchiveBox subcommand with the given list of args"""
|
||||
|
||||
if subcommand not in meta_cmds:
|
||||
from ..config import setup_django
|
||||
setup_django(in_memory_db=subcommand in fake_db, check_db=subcommand in archive_cmds)
|
||||
|
||||
module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
||||
module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore
|
||||
|
||||
|
@ -134,3 +140,5 @@ __all__ = (
|
|||
'run_subcommand',
|
||||
*SUBCOMMANDS.keys(),
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -89,8 +89,8 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
index_only=command.index_only,
|
||||
overwrite=command.overwrite,
|
||||
init=command.init,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
extractors=command.extract,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -98,7 +98,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
parser.add_argument(
|
||||
'--filter-type',
|
||||
type=str,
|
||||
choices=('exact', 'substring', 'domain', 'regex','tag'),
|
||||
choices=('exact', 'substring', 'domain', 'regex', 'tag', 'search'),
|
||||
default='exact',
|
||||
help='Type of pattern matching to use when filtering URLs',
|
||||
)
|
||||
|
|
|
@ -36,6 +36,13 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
' ~/Desktop/sites_list.csv\n'
|
||||
)
|
||||
)
|
||||
parser.add_argument(
|
||||
"--extract",
|
||||
type=str,
|
||||
help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
|
||||
This does not take precedence over the configuration",
|
||||
default=""
|
||||
)
|
||||
parser.add_argument(
|
||||
'--out-dir',
|
||||
type=str,
|
||||
|
@ -55,6 +62,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
oneshot(
|
||||
url=stdin_url or url,
|
||||
out_dir=Path(command.out_dir).resolve(),
|
||||
extractors=command.extract,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -91,7 +91,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
parser.add_argument(
|
||||
'--filter-type',
|
||||
type=str,
|
||||
choices=('exact', 'substring', 'domain', 'regex'),
|
||||
choices=('exact', 'substring', 'domain', 'regex', 'tag', 'search'),
|
||||
default='exact',
|
||||
help='Type of pattern matching to use when filtering URLs',
|
||||
)
|
||||
|
@ -102,6 +102,13 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
default=None,
|
||||
help='Update only URLs matching these filter patterns.'
|
||||
)
|
||||
parser.add_argument(
|
||||
"--extract",
|
||||
type=str,
|
||||
help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
|
||||
This does not take precedence over the configuration",
|
||||
default=""
|
||||
)
|
||||
command = parser.parse_args(args or ())
|
||||
filter_patterns_str = accept_stdin(stdin)
|
||||
|
||||
|
@ -117,6 +124,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
after=command.after,
|
||||
before=command.before,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
extractors=command.extract,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -1,3 +1,24 @@
|
|||
"""
|
||||
ArchiveBox config definitons (including defaults and dynamic config options).
|
||||
|
||||
Config Usage Example:
|
||||
|
||||
archivebox config --set MEDIA_TIMEOUT=600
|
||||
env MEDIA_TIMEOUT=600 USE_COLOR=False ... archivebox [subcommand] ...
|
||||
|
||||
Config Precedence Order:
|
||||
|
||||
1. cli args (--update-all / --index-only / etc.)
|
||||
2. shell environment vars (env USE_COLOR=False archivebox add '...')
|
||||
3. config file (echo "SAVE_FAVICON=False" >> ArchiveBox.conf)
|
||||
4. defaults (defined below in Python)
|
||||
|
||||
Documentation:
|
||||
|
||||
https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
|
||||
|
||||
"""
|
||||
|
||||
__package__ = 'archivebox'
|
||||
|
||||
import os
|
||||
|
@ -24,26 +45,9 @@ from .config_stubs import (
|
|||
ConfigDefaultDict,
|
||||
)
|
||||
|
||||
# precedence order for config:
|
||||
# 1. cli args (e.g. )
|
||||
# 2. shell environment vars (env USE_COLOR=False archivebox add '...')
|
||||
# 3. config file (echo "SAVE_FAVICON=False" >> ArchiveBox.conf)
|
||||
# 4. defaults (defined below in Python)
|
||||
############################### Config Schema ##################################
|
||||
|
||||
#
|
||||
# env SHOW_PROGRESS=1 archivebox add '...'
|
||||
# archivebox config --set TIMEOUT=600
|
||||
#
|
||||
|
||||
# ******************************************************************************
|
||||
# Documentation: https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
|
||||
# Use the 'env' command to pass config options to ArchiveBox. e.g.:
|
||||
# env USE_COLOR=True CHROME_BINARY=chromium archivebox add < example.html
|
||||
# ******************************************************************************
|
||||
|
||||
################################# User Config ##################################
|
||||
|
||||
CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
|
||||
CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||
'SHELL_CONFIG': {
|
||||
'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()},
|
||||
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
||||
|
@ -139,6 +143,18 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
|
|||
'GIT_ARGS': {'type': list, 'default': ['--recursive']},
|
||||
},
|
||||
|
||||
'SEARCH_BACKEND_CONFIG' : {
|
||||
'USE_INDEXING_BACKEND': {'type': bool, 'default': True},
|
||||
'USE_SEARCHING_BACKEND': {'type': bool, 'default': True},
|
||||
'SEARCH_BACKEND_ENGINE': {'type': str, 'default': 'ripgrep'},
|
||||
'SEARCH_BACKEND_HOST_NAME': {'type': str, 'default': 'localhost'},
|
||||
'SEARCH_BACKEND_PORT': {'type': int, 'default': 1491},
|
||||
'SEARCH_BACKEND_PASSWORD': {'type': str, 'default': 'SecretPassword'},
|
||||
# SONIC
|
||||
'SONIC_COLLECTION': {'type': str, 'default': 'archivebox'},
|
||||
'SONIC_BUCKET': {'type': str, 'default': 'snapshots'},
|
||||
},
|
||||
|
||||
'DEPENDENCY_CONFIG': {
|
||||
'USE_CURL': {'type': bool, 'default': True},
|
||||
'USE_WGET': {'type': bool, 'default': True},
|
||||
|
@ -149,7 +165,8 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
|
|||
'USE_CHROME': {'type': bool, 'default': True},
|
||||
'USE_NODE': {'type': bool, 'default': True},
|
||||
'USE_YOUTUBEDL': {'type': bool, 'default': True},
|
||||
|
||||
'USE_RIPGREP': {'type': bool, 'default': True},
|
||||
|
||||
'CURL_BINARY': {'type': str, 'default': 'curl'},
|
||||
'GIT_BINARY': {'type': str, 'default': 'git'},
|
||||
'WGET_BINARY': {'type': str, 'default': 'wget'},
|
||||
|
@ -158,25 +175,48 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
|
|||
'MERCURY_BINARY': {'type': str, 'default': 'mercury-parser'},
|
||||
'YOUTUBEDL_BINARY': {'type': str, 'default': 'youtube-dl'},
|
||||
'NODE_BINARY': {'type': str, 'default': 'node'},
|
||||
'RIPGREP_BINARY': {'type': str, 'default': 'rg'},
|
||||
'CHROME_BINARY': {'type': str, 'default': None},
|
||||
|
||||
'POCKET_CONSUMER_KEY': {'type': str, 'default': None},
|
||||
'POCKET_ACCESS_TOKENS': {'type': dict, 'default': {}},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
########################## Backwards-Compatibility #############################
|
||||
|
||||
|
||||
# for backwards compatibility with old config files, check old/deprecated names for each key
|
||||
CONFIG_ALIASES = {
|
||||
alias: key
|
||||
for section in CONFIG_DEFAULTS.values()
|
||||
for section in CONFIG_SCHEMA.values()
|
||||
for key, default in section.items()
|
||||
for alias in default.get('aliases', ())
|
||||
}
|
||||
USER_CONFIG = {key for section in CONFIG_DEFAULTS.values() for key in section.keys()}
|
||||
USER_CONFIG = {key for section in CONFIG_SCHEMA.values() for key in section.keys()}
|
||||
|
||||
def get_real_name(key: str) -> str:
|
||||
"""get the current canonical name for a given deprecated config key"""
|
||||
return CONFIG_ALIASES.get(key.upper().strip(), key.upper().strip())
|
||||
|
||||
############################## Derived Config ##############################
|
||||
|
||||
# Constants
|
||||
|
||||
################################ Constants #####################################
|
||||
|
||||
PACKAGE_DIR_NAME = 'archivebox'
|
||||
TEMPLATES_DIR_NAME = 'themes'
|
||||
|
||||
ARCHIVE_DIR_NAME = 'archive'
|
||||
SOURCES_DIR_NAME = 'sources'
|
||||
LOGS_DIR_NAME = 'logs'
|
||||
STATIC_DIR_NAME = 'static'
|
||||
SQL_INDEX_FILENAME = 'index.sqlite3'
|
||||
JSON_INDEX_FILENAME = 'index.json'
|
||||
HTML_INDEX_FILENAME = 'index.html'
|
||||
ROBOTS_TXT_FILENAME = 'robots.txt'
|
||||
FAVICON_FILENAME = 'favicon.ico'
|
||||
CONFIG_FILENAME = 'ArchiveBox.conf'
|
||||
|
||||
DEFAULT_CLI_COLORS = {
|
||||
'reset': '\033[00;00m',
|
||||
|
@ -225,42 +265,18 @@ STATICFILE_EXTENSIONS = {
|
|||
# html, htm, shtml, xhtml, xml, aspx, php, cgi
|
||||
}
|
||||
|
||||
PACKAGE_DIR_NAME = 'archivebox'
|
||||
TEMPLATES_DIR_NAME = 'themes'
|
||||
|
||||
ARCHIVE_DIR_NAME = 'archive'
|
||||
SOURCES_DIR_NAME = 'sources'
|
||||
LOGS_DIR_NAME = 'logs'
|
||||
STATIC_DIR_NAME = 'static'
|
||||
SQL_INDEX_FILENAME = 'index.sqlite3'
|
||||
JSON_INDEX_FILENAME = 'index.json'
|
||||
HTML_INDEX_FILENAME = 'index.html'
|
||||
ROBOTS_TXT_FILENAME = 'robots.txt'
|
||||
FAVICON_FILENAME = 'favicon.ico'
|
||||
CONFIG_FILENAME = 'ArchiveBox.conf'
|
||||
|
||||
CONFIG_HEADER = (
|
||||
"""# This is the config file for your ArchiveBox collection.
|
||||
#
|
||||
# You can add options here manually in INI format, or automatically by running:
|
||||
# archivebox config --set KEY=VALUE
|
||||
#
|
||||
# If you modify this file manually, make sure to update your archive after by running:
|
||||
# archivebox init
|
||||
#
|
||||
# A list of all possible config with documentation and examples can be found here:
|
||||
# https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
|
||||
|
||||
""")
|
||||
|
||||
|
||||
DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
||||
############################## Derived Config ##################################
|
||||
|
||||
|
||||
DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
||||
'TERM_WIDTH': {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns},
|
||||
'USER': {'default': lambda c: getpass.getuser() or os.getlogin()},
|
||||
'ANSI': {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
|
||||
|
||||
'PACKAGE_DIR': {'default': lambda c: Path(__file__).resolve().parent},
|
||||
'TEMPLATES_DIR': {'default': lambda c: c['PACKAGE_DIR'] / TEMPLATES_DIR_NAME / 'legacy'},
|
||||
'TEMPLATES_DIR': {'default': lambda c: c['PACKAGE_DIR'] / TEMPLATES_DIR_NAME},
|
||||
|
||||
'OUTPUT_DIR': {'default': lambda c: Path(c['OUTPUT_DIR']).resolve() if c['OUTPUT_DIR'] else Path(os.curdir).resolve()},
|
||||
'ARCHIVE_DIR': {'default': lambda c: c['OUTPUT_DIR'] / ARCHIVE_DIR_NAME},
|
||||
|
@ -297,6 +313,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
|||
'SAVE_WARC': {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']},
|
||||
'WGET_ARGS': {'default': lambda c: c['WGET_ARGS'] or []},
|
||||
|
||||
'RIPGREP_VERSION': {'default': lambda c: bin_version(c['RIPGREP_BINARY']) if c['USE_RIPGREP'] else None},
|
||||
|
||||
'USE_SINGLEFILE': {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
|
||||
'SINGLEFILE_VERSION': {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
|
||||
|
@ -305,7 +322,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
|||
'READABILITY_VERSION': {'default': lambda c: bin_version(c['READABILITY_BINARY']) if c['USE_READABILITY'] else None},
|
||||
|
||||
'USE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']},
|
||||
'MERCURY_VERSION': {'default': lambda c: '1.0.0' if (c['USE_MERCURY'] and c['MERCURY_BINARY']) else None}, # mercury is unversioned
|
||||
'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury is unversioned
|
||||
|
||||
'USE_GIT': {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
|
||||
'GIT_VERSION': {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},
|
||||
|
@ -319,8 +336,6 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
|||
'USE_CHROME': {'default': lambda c: c['USE_CHROME'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'] or c['SAVE_SINGLEFILE'])},
|
||||
'CHROME_BINARY': {'default': lambda c: c['CHROME_BINARY'] if c['CHROME_BINARY'] else find_chrome_binary()},
|
||||
'CHROME_VERSION': {'default': lambda c: bin_version(c['CHROME_BINARY']) if c['USE_CHROME'] else None},
|
||||
'USE_NODE': {'default': lambda c: c['USE_NODE'] and (c['SAVE_READABILITY'] or c['SAVE_SINGLEFILE'])},
|
||||
'NODE_VERSION': {'default': lambda c: bin_version(c['NODE_BINARY']) if c['USE_NODE'] else None},
|
||||
|
||||
'SAVE_PDF': {'default': lambda c: c['USE_CHROME'] and c['SAVE_PDF']},
|
||||
'SAVE_SCREENSHOT': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SCREENSHOT']},
|
||||
|
@ -328,6 +343,9 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
|||
'SAVE_SINGLEFILE': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SINGLEFILE'] and c['USE_NODE']},
|
||||
'SAVE_READABILITY': {'default': lambda c: c['USE_READABILITY'] and c['USE_NODE']},
|
||||
'SAVE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['USE_NODE']},
|
||||
|
||||
'USE_NODE': {'default': lambda c: c['USE_NODE'] and (c['SAVE_READABILITY'] or c['SAVE_SINGLEFILE'] or c['SAVE_MERCURY'])},
|
||||
'NODE_VERSION': {'default': lambda c: bin_version(c['NODE_BINARY']) if c['USE_NODE'] else None},
|
||||
|
||||
'DEPENDENCIES': {'default': lambda c: get_dependency_info(c)},
|
||||
'CODE_LOCATIONS': {'default': lambda c: get_code_locations(c)},
|
||||
|
@ -340,6 +358,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
|||
|
||||
################################### Helpers ####################################
|
||||
|
||||
|
||||
def load_config_val(key: str,
|
||||
default: ConfigDefaultValue=None,
|
||||
type: Optional[Type]=None,
|
||||
|
@ -386,7 +405,7 @@ def load_config_val(key: str,
|
|||
raise ValueError(f'Invalid configuration option {key}={val} (expected an integer)')
|
||||
return int(val)
|
||||
|
||||
elif type is list:
|
||||
elif type is list or type is dict:
|
||||
return json.loads(val)
|
||||
|
||||
raise Exception('Config values can only be str, bool, int or json')
|
||||
|
@ -418,6 +437,20 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
|||
|
||||
from .system import atomic_write
|
||||
|
||||
CONFIG_HEADER = (
|
||||
"""# This is the config file for your ArchiveBox collection.
|
||||
#
|
||||
# You can add options here manually in INI format, or automatically by running:
|
||||
# archivebox config --set KEY=VALUE
|
||||
#
|
||||
# If you modify this file manually, make sure to update your archive after by running:
|
||||
# archivebox init
|
||||
#
|
||||
# A list of all possible config with documentation and examples can be found here:
|
||||
# https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
|
||||
|
||||
""")
|
||||
|
||||
out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve()
|
||||
config_path = Path(out_dir) / CONFIG_FILENAME
|
||||
|
||||
|
@ -431,7 +464,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
|||
with open(config_path, 'r') as old:
|
||||
atomic_write(f'{config_path}.bak', old.read())
|
||||
|
||||
find_section = lambda key: [name for name, opts in CONFIG_DEFAULTS.items() if key in opts][0]
|
||||
find_section = lambda key: [name for name, opts in CONFIG_SCHEMA.items() if key in opts][0]
|
||||
|
||||
# Set up sections in empty config file
|
||||
for key, val in config.items():
|
||||
|
@ -520,6 +553,8 @@ def load_config(defaults: ConfigDefaultDict,
|
|||
|
||||
# with open(os.path.join(config['OUTPUT_DIR'], CONFIG_FILENAME), 'w+') as f:
|
||||
|
||||
|
||||
# Logging Helpers
|
||||
def stdout(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
|
||||
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
||||
|
||||
|
@ -551,6 +586,7 @@ def hint(text: Union[Tuple[str, ...], List[str], str], prefix=' ', config: Op
|
|||
stderr('{} {}'.format(prefix, line))
|
||||
|
||||
|
||||
# Dependency Metadata Helpers
|
||||
def bin_version(binary: Optional[str]) -> Optional[str]:
|
||||
"""check the presence and return valid version line of a specified binary"""
|
||||
|
||||
|
@ -580,7 +616,7 @@ def bin_path(binary: Optional[str]) -> Optional[str]:
|
|||
if node_modules_bin.exists():
|
||||
return str(node_modules_bin.resolve())
|
||||
|
||||
return shutil.which(Path(binary).expanduser()) or binary
|
||||
return shutil.which(str(Path(binary).expanduser())) or shutil.which(str(binary)) or binary
|
||||
|
||||
def bin_hash(binary: Optional[str]) -> Optional[str]:
|
||||
if binary is None:
|
||||
|
@ -667,7 +703,7 @@ def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
|
|||
'TEMPLATES_DIR': {
|
||||
'path': (config['TEMPLATES_DIR']).resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': (config['TEMPLATES_DIR'] / 'static').exists(),
|
||||
'is_valid': (config['TEMPLATES_DIR'] / config['ACTIVE_THEME'] / 'static').exists(),
|
||||
},
|
||||
# 'NODE_MODULES_DIR': {
|
||||
# 'path': ,
|
||||
|
@ -811,6 +847,21 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
|
|||
'enabled': config['USE_CHROME'],
|
||||
'is_valid': bool(config['CHROME_VERSION']),
|
||||
},
|
||||
'RIPGREP_BINARY': {
|
||||
'path': bin_path(config['RIPGREP_BINARY']),
|
||||
'version': config['RIPGREP_VERSION'],
|
||||
'hash': bin_hash(config['RIPGREP_BINARY']),
|
||||
'enabled': config['USE_RIPGREP'],
|
||||
'is_valid': bool(config['RIPGREP_VERSION']),
|
||||
},
|
||||
# TODO: add an entry for the sonic search backend?
|
||||
# 'SONIC_BINARY': {
|
||||
# 'path': bin_path(config['SONIC_BINARY']),
|
||||
# 'version': config['SONIC_VERSION'],
|
||||
# 'hash': bin_hash(config['SONIC_BINARY']),
|
||||
# 'enabled': config['USE_SONIC'],
|
||||
# 'is_valid': bool(config['SONIC_VERSION']),
|
||||
# },
|
||||
}
|
||||
|
||||
def get_chrome_info(config: ConfigDict) -> ConfigValue:
|
||||
|
@ -826,28 +877,51 @@ def get_chrome_info(config: ConfigDict) -> ConfigValue:
|
|||
}
|
||||
|
||||
|
||||
################################## Load Config #################################
|
||||
# ******************************************************************************
|
||||
# ******************************************************************************
|
||||
# ******************************** Load Config *********************************
|
||||
# ******* (compile the defaults, configs, and metadata all into CONFIG) ********
|
||||
# ******************************************************************************
|
||||
# ******************************************************************************
|
||||
|
||||
|
||||
def load_all_config():
|
||||
CONFIG: ConfigDict = {}
|
||||
for section_name, section_config in CONFIG_DEFAULTS.items():
|
||||
for section_name, section_config in CONFIG_SCHEMA.items():
|
||||
CONFIG = load_config(section_config, CONFIG)
|
||||
|
||||
return load_config(DERIVED_CONFIG_DEFAULTS, CONFIG)
|
||||
return load_config(DYNAMIC_CONFIG_SCHEMA, CONFIG)
|
||||
|
||||
# add all final config values in CONFIG to globals in this file
|
||||
CONFIG = load_all_config()
|
||||
globals().update(CONFIG)
|
||||
# this lets us do: from .config import DEBUG, MEDIA_TIMEOUT, ...
|
||||
|
||||
# Timezone set as UTC
|
||||
|
||||
# ******************************************************************************
|
||||
# ******************************************************************************
|
||||
# ******************************************************************************
|
||||
# ******************************************************************************
|
||||
# ******************************************************************************
|
||||
|
||||
|
||||
|
||||
########################### System Environment Setup ###########################
|
||||
|
||||
|
||||
# Set timezone to UTC and umask to OUTPUT_PERMISSIONS
|
||||
os.environ["TZ"] = 'UTC'
|
||||
os.umask(0o777 - int(OUTPUT_PERMISSIONS, base=8)) # noqa: F821
|
||||
|
||||
# add ./node_modules/.bin to $PATH so we can use node scripts in extractors
|
||||
NODE_BIN_PATH = str((Path(CONFIG["OUTPUT_DIR"]).absolute() / 'node_modules' / '.bin'))
|
||||
sys.path.append(NODE_BIN_PATH)
|
||||
|
||||
|
||||
############################## Importable Checkers #############################
|
||||
|
||||
|
||||
########################### Config Validity Checkers ###########################
|
||||
|
||||
|
||||
def check_system_config(config: ConfigDict=CONFIG) -> None:
|
||||
### Check system environment
|
||||
|
@ -936,7 +1010,7 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
|
|||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
|
||||
stderr()
|
||||
|
||||
def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) -> None:
|
||||
def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG) -> None:
|
||||
output_dir = out_dir or config['OUTPUT_DIR']
|
||||
assert isinstance(output_dir, (str, Path))
|
||||
|
||||
|
@ -976,7 +1050,7 @@ def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) ->
|
|||
|
||||
|
||||
|
||||
def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG) -> None:
|
||||
def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
|
||||
check_system_config()
|
||||
|
||||
output_dir = out_dir or Path(config['OUTPUT_DIR'])
|
||||
|
@ -989,7 +1063,15 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG)
|
|||
os.environ.setdefault('OUTPUT_DIR', str(output_dir))
|
||||
assert (config['PACKAGE_DIR'] / 'core' / 'settings.py').exists(), 'settings.py was not found at archivebox/core/settings.py'
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||
django.setup()
|
||||
|
||||
if in_memory_db:
|
||||
# Put the db in memory and run migrations in case any command requires it
|
||||
from django.core.management import call_command
|
||||
os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:")
|
||||
django.setup()
|
||||
call_command("migrate", interactive=False, verbosity=0)
|
||||
else:
|
||||
django.setup()
|
||||
|
||||
if check_db:
|
||||
sql_index_path = Path(output_dir) / SQL_INDEX_FILENAME
|
||||
|
@ -997,5 +1079,3 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG)
|
|||
f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {config["OUTPUT_DIR"]}')
|
||||
except KeyboardInterrupt:
|
||||
raise SystemExit(2)
|
||||
|
||||
os.umask(0o777 - int(OUTPUT_PERMISSIONS, base=8)) # noqa: F821
|
||||
|
|
|
@ -13,8 +13,10 @@ from django import forms
|
|||
|
||||
from core.models import Snapshot, Tag
|
||||
from core.forms import AddLinkForm, TagField
|
||||
from core.utils import get_icons
|
||||
|
||||
from core.mixins import SearchResultsAdminMixin
|
||||
|
||||
from index.html import snapshot_icons
|
||||
from util import htmldecode, urldecode, ansi_to_html
|
||||
from logging_util import printable_filesize
|
||||
from main import add, remove
|
||||
|
@ -82,7 +84,7 @@ class SnapshotAdminForm(forms.ModelForm):
|
|||
return instance
|
||||
|
||||
|
||||
class SnapshotAdmin(admin.ModelAdmin):
|
||||
class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
|
||||
list_display = ('added', 'title_str', 'url_str', 'files', 'size')
|
||||
sort_fields = ('title_str', 'url_str', 'added')
|
||||
readonly_fields = ('id', 'url', 'timestamp', 'num_outputs', 'is_archived', 'url_hash', 'added', 'updated')
|
||||
|
@ -94,6 +96,13 @@ class SnapshotAdmin(admin.ModelAdmin):
|
|||
actions_template = 'admin/actions_as_select.html'
|
||||
form = SnapshotAdminForm
|
||||
|
||||
def get_urls(self):
|
||||
urls = super().get_urls()
|
||||
custom_urls = [
|
||||
path('grid/', self.admin_site.admin_view(self.grid_view),name='grid')
|
||||
]
|
||||
return custom_urls + urls
|
||||
|
||||
def get_queryset(self, request):
|
||||
return super().get_queryset(request).prefetch_related('tags')
|
||||
|
||||
|
@ -128,7 +137,7 @@ class SnapshotAdmin(admin.ModelAdmin):
|
|||
) + mark_safe(f' <span class="tags">{tags}</span>')
|
||||
|
||||
def files(self, obj):
|
||||
return get_icons(obj)
|
||||
return snapshot_icons(obj)
|
||||
|
||||
def size(self, obj):
|
||||
archive_size = obj.archive_size
|
||||
|
@ -151,6 +160,31 @@ class SnapshotAdmin(admin.ModelAdmin):
|
|||
obj.url.split('://www.', 1)[-1].split('://', 1)[-1][:64],
|
||||
)
|
||||
|
||||
def grid_view(self, request):
|
||||
|
||||
# cl = self.get_changelist_instance(request)
|
||||
|
||||
# Save before monkey patching to restore for changelist list view
|
||||
saved_change_list_template = self.change_list_template
|
||||
saved_list_per_page = self.list_per_page
|
||||
saved_list_max_show_all = self.list_max_show_all
|
||||
|
||||
# Monkey patch here plus core_tags.py
|
||||
self.change_list_template = 'admin/grid_change_list.html'
|
||||
self.list_per_page = 20
|
||||
self.list_max_show_all = self.list_per_page
|
||||
|
||||
# Call monkey patched view
|
||||
rendered_response = self.changelist_view(request)
|
||||
|
||||
# Restore values
|
||||
self.change_list_template = saved_change_list_template
|
||||
self.list_per_page = saved_list_per_page
|
||||
self.list_max_show_all = saved_list_max_show_all
|
||||
|
||||
return rendered_response
|
||||
|
||||
|
||||
id_str.short_description = 'ID'
|
||||
title_str.short_description = 'Title'
|
||||
url_str.short_description = 'Original URL'
|
||||
|
@ -216,7 +250,6 @@ class ArchiveBoxAdmin(admin.AdminSite):
|
|||
|
||||
return render(template_name='add_links.html', request=request, context=context)
|
||||
|
||||
|
||||
admin.site = ArchiveBoxAdmin()
|
||||
admin.site.register(get_user_model())
|
||||
admin.site.register(Snapshot, SnapshotAdmin)
|
||||
|
|
|
@ -3,18 +3,29 @@ __package__ = 'archivebox.core'
|
|||
from django import forms
|
||||
|
||||
from ..util import URL_REGEX
|
||||
from .utils_taggit import edit_string_for_tags, parse_tags
|
||||
from ..vendor.taggit_utils import edit_string_for_tags, parse_tags
|
||||
|
||||
CHOICES = (
|
||||
('0', 'depth = 0 (archive just these URLs)'),
|
||||
('1', 'depth = 1 (archive these URLs and all URLs one hop away)'),
|
||||
)
|
||||
|
||||
from ..extractors import get_default_archive_methods
|
||||
|
||||
ARCHIVE_METHODS = [
|
||||
(name, name)
|
||||
for name, _, _ in get_default_archive_methods()
|
||||
]
|
||||
|
||||
|
||||
class AddLinkForm(forms.Form):
|
||||
url = forms.RegexField(label="URLs (one per line)", regex=URL_REGEX, min_length='6', strip=True, widget=forms.Textarea, required=True)
|
||||
depth = forms.ChoiceField(label="Archive depth", choices=CHOICES, widget=forms.RadioSelect, initial='0')
|
||||
|
||||
|
||||
archive_methods = forms.MultipleChoiceField(
|
||||
required=False,
|
||||
widget=forms.SelectMultiple,
|
||||
choices=ARCHIVE_METHODS,
|
||||
)
|
||||
class TagWidgetMixin:
|
||||
def format_value(self, value):
|
||||
if value is not None and not isinstance(value, str):
|
||||
|
|
97
archivebox/core/migrations/0007_archiveresult.py
Normal file
|
@ -0,0 +1,97 @@
|
|||
# Generated by Django 3.0.8 on 2020-11-04 12:25
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
from config import CONFIG
|
||||
from index.json import to_json
|
||||
|
||||
try:
|
||||
JSONField = models.JSONField
|
||||
except AttributeError:
|
||||
import jsonfield
|
||||
JSONField = jsonfield.JSONField
|
||||
|
||||
|
||||
def forwards_func(apps, schema_editor):
|
||||
from core.models import EXTRACTORS
|
||||
|
||||
Snapshot = apps.get_model("core", "Snapshot")
|
||||
ArchiveResult = apps.get_model("core", "ArchiveResult")
|
||||
|
||||
snapshots = Snapshot.objects.all()
|
||||
for snapshot in snapshots:
|
||||
out_dir = Path(CONFIG['ARCHIVE_DIR']) / snapshot.timestamp
|
||||
|
||||
try:
|
||||
with open(out_dir / "index.json", "r") as f:
|
||||
fs_index = json.load(f)
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
history = fs_index["history"]
|
||||
|
||||
for extractor in history:
|
||||
for result in history[extractor]:
|
||||
ArchiveResult.objects.create(extractor=extractor, snapshot=snapshot, cmd=result["cmd"], cmd_version=result["cmd_version"],
|
||||
start_ts=result["start_ts"], end_ts=result["end_ts"], status=result["status"], pwd=result["pwd"], output=result["output"])
|
||||
|
||||
|
||||
def verify_json_index_integrity(snapshot):
|
||||
results = snapshot.archiveresult_set.all()
|
||||
out_dir = Path(CONFIG['ARCHIVE_DIR']) / snapshot.timestamp
|
||||
with open(out_dir / "index.json", "r") as f:
|
||||
index = json.load(f)
|
||||
|
||||
history = index["history"]
|
||||
index_results = [result for extractor in history for result in history[extractor]]
|
||||
flattened_results = [result["start_ts"] for result in index_results]
|
||||
|
||||
missing_results = [result for result in results if result.start_ts.isoformat() not in flattened_results]
|
||||
|
||||
for missing in missing_results:
|
||||
index["history"][missing.extractor].append({"cmd": missing.cmd, "cmd_version": missing.cmd_version, "end_ts": missing.end_ts.isoformat(),
|
||||
"start_ts": missing.start_ts.isoformat(), "pwd": missing.pwd, "output": missing.output,
|
||||
"schema": "ArchiveResult", "status": missing.status})
|
||||
|
||||
json_index = to_json(index)
|
||||
with open(out_dir / "index.json", "w") as f:
|
||||
f.write(json_index)
|
||||
|
||||
|
||||
def reverse_func(apps, schema_editor):
|
||||
Snapshot = apps.get_model("core", "Snapshot")
|
||||
ArchiveResult = apps.get_model("core", "ArchiveResult")
|
||||
for snapshot in Snapshot.objects.all():
|
||||
verify_json_index_integrity(snapshot)
|
||||
|
||||
ArchiveResult.objects.all().delete()
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0006_auto_20201012_1520'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='ArchiveResult',
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('cmd', JSONField()),
|
||||
('pwd', models.CharField(max_length=256)),
|
||||
('cmd_version', models.CharField(max_length=32)),
|
||||
('status', models.CharField(choices=[('succeeded', 'succeeded'), ('failed', 'failed'), ('skipped', 'skipped')], max_length=16)),
|
||||
('output', models.CharField(max_length=512)),
|
||||
('start_ts', models.DateTimeField()),
|
||||
('end_ts', models.DateTimeField()),
|
||||
('extractor', models.CharField(choices=[('title', 'title'), ('favicon', 'favicon'), ('wget', 'wget'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('headers', 'headers'), ('archive_org', 'archive_org')], max_length=32)),
|
||||
('snapshot', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.Snapshot')),
|
||||
],
|
||||
),
|
||||
migrations.RunPython(forwards_func, reverse_func),
|
||||
]
|
23
archivebox/core/mixins.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
from django.contrib import messages
|
||||
|
||||
from archivebox.search import query_search_index
|
||||
|
||||
class SearchResultsAdminMixin(object):
|
||||
def get_search_results(self, request, queryset, search_term):
|
||||
''' Enhances the search queryset with results from the search backend.
|
||||
'''
|
||||
qs, use_distinct = \
|
||||
super(SearchResultsAdminMixin, self).get_search_results(
|
||||
request, queryset, search_term)
|
||||
|
||||
search_term = search_term.strip()
|
||||
if not search_term:
|
||||
return qs, use_distinct
|
||||
try:
|
||||
qsearch = query_search_index(search_term)
|
||||
except Exception as err:
|
||||
messages.add_message(request, messages.WARNING, f'Error from the search backend, only showing results from default admin search fields - Error: {err}')
|
||||
else:
|
||||
qs = queryset & qsearch
|
||||
finally:
|
||||
return qs, use_distinct
|
|
@ -5,9 +5,24 @@ import uuid
|
|||
from django.db import models, transaction
|
||||
from django.utils.functional import cached_property
|
||||
from django.utils.text import slugify
|
||||
from django.db.models import Case, When, Value, IntegerField
|
||||
|
||||
from ..util import parse_date
|
||||
from ..index.schema import Link
|
||||
from ..extractors import get_default_archive_methods, ARCHIVE_METHODS_INDEXING_PRECEDENCE
|
||||
|
||||
EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()]
|
||||
STATUS_CHOICES = [
|
||||
("succeeded", "succeeded"),
|
||||
("failed", "failed"),
|
||||
("skipped", "skipped")
|
||||
]
|
||||
|
||||
try:
|
||||
JSONField = models.JSONField
|
||||
except AttributeError:
|
||||
import jsonfield
|
||||
JSONField = jsonfield.JSONField
|
||||
|
||||
|
||||
class Tag(models.Model):
|
||||
|
@ -51,6 +66,7 @@ class Tag(models.Model):
|
|||
else:
|
||||
return super().save(*args, **kwargs)
|
||||
|
||||
|
||||
class Snapshot(models.Model):
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
|
||||
|
@ -83,7 +99,7 @@ class Snapshot(models.Model):
|
|||
return {
|
||||
key: getattr(self, key)
|
||||
if key != 'tags' else self.tags_str()
|
||||
for key in args
|
||||
for key in args
|
||||
}
|
||||
|
||||
def as_link(self) -> Link:
|
||||
|
@ -92,7 +108,7 @@ class Snapshot(models.Model):
|
|||
def as_link_with_details(self) -> Link:
|
||||
from ..index import load_link_details
|
||||
return load_link_details(self.as_link())
|
||||
|
||||
|
||||
def tags_str(self) -> str:
|
||||
return ','.join(self.tags.order_by('name').values_list('name', flat=True))
|
||||
|
||||
|
@ -106,7 +122,7 @@ class Snapshot(models.Model):
|
|||
|
||||
@cached_property
|
||||
def num_outputs(self):
|
||||
return self.as_link().num_outputs
|
||||
return self.archiveresult_set.filter(status='succeeded').count()
|
||||
|
||||
@cached_property
|
||||
def url_hash(self):
|
||||
|
@ -130,8 +146,8 @@ class Snapshot(models.Model):
|
|||
|
||||
@cached_property
|
||||
def history(self):
|
||||
from ..index import load_link_details
|
||||
return load_link_details(self.as_link()).history
|
||||
# TODO: use ArchiveResult for this instead of json
|
||||
return self.as_link_with_details().history
|
||||
|
||||
@cached_property
|
||||
def latest_title(self):
|
||||
|
@ -142,9 +158,37 @@ class Snapshot(models.Model):
|
|||
return self.history['title'][-1].output.strip()
|
||||
return None
|
||||
|
||||
def save_tags(self, tags=[]):
|
||||
def save_tags(self, tags=()):
|
||||
tags_id = []
|
||||
for tag in tags:
|
||||
tags_id.append(Tag.objects.get_or_create(name=tag)[0].id)
|
||||
self.tags.clear()
|
||||
self.tags.add(*tags_id)
|
||||
|
||||
|
||||
class ArchiveResultManager(models.Manager):
|
||||
def indexable(self, sorted: bool = True):
|
||||
INDEXABLE_METHODS = [ r[0] for r in ARCHIVE_METHODS_INDEXING_PRECEDENCE ]
|
||||
qs = self.get_queryset().filter(extractor__in=INDEXABLE_METHODS,status='succeeded')
|
||||
|
||||
if sorted:
|
||||
precedence = [ When(extractor=method, then=Value(precedence)) for method, precedence in ARCHIVE_METHODS_INDEXING_PRECEDENCE ]
|
||||
qs = qs.annotate(indexing_precedence=Case(*precedence, default=Value(1000),output_field=IntegerField())).order_by('indexing_precedence')
|
||||
return qs
|
||||
|
||||
|
||||
class ArchiveResult(models.Model):
|
||||
snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
|
||||
cmd = JSONField()
|
||||
pwd = models.CharField(max_length=256)
|
||||
cmd_version = models.CharField(max_length=32)
|
||||
output = models.CharField(max_length=512)
|
||||
start_ts = models.DateTimeField()
|
||||
end_ts = models.DateTimeField()
|
||||
status = models.CharField(max_length=16, choices=STATUS_CHOICES)
|
||||
extractor = models.CharField(choices=EXTRACTORS, max_length=32)
|
||||
|
||||
objects = ArchiveResultManager()
|
||||
|
||||
def __str__(self):
|
||||
return self.extractor
|
||||
|
|
|
@ -12,6 +12,7 @@ from ..config import (
|
|||
ALLOWED_HOSTS,
|
||||
PACKAGE_DIR,
|
||||
ACTIVE_THEME,
|
||||
TEMPLATES_DIR_NAME,
|
||||
SQL_INDEX_FILENAME,
|
||||
OUTPUT_DIR,
|
||||
)
|
||||
|
@ -68,14 +69,14 @@ AUTHENTICATION_BACKENDS = [
|
|||
STATIC_URL = '/static/'
|
||||
|
||||
STATICFILES_DIRS = [
|
||||
str(Path(PACKAGE_DIR) / 'themes' / ACTIVE_THEME / 'static'),
|
||||
str(Path(PACKAGE_DIR) / 'themes' / 'default' / 'static'),
|
||||
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / ACTIVE_THEME / 'static'),
|
||||
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'default' / 'static'),
|
||||
]
|
||||
|
||||
TEMPLATE_DIRS = [
|
||||
str(Path(PACKAGE_DIR) / 'themes' / ACTIVE_THEME),
|
||||
str(Path(PACKAGE_DIR) / 'themes' / 'default'),
|
||||
str(Path(PACKAGE_DIR) / 'themes'),
|
||||
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / ACTIVE_THEME),
|
||||
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'default'),
|
||||
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME),
|
||||
]
|
||||
|
||||
TEMPLATES = [
|
||||
|
@ -100,10 +101,12 @@ TEMPLATES = [
|
|||
################################################################################
|
||||
|
||||
DATABASE_FILE = Path(OUTPUT_DIR) / SQL_INDEX_FILENAME
|
||||
DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", DATABASE_FILE)
|
||||
|
||||
DATABASES = {
|
||||
'default': {
|
||||
'ENGINE': 'django.db.backends.sqlite3',
|
||||
'NAME': str(DATABASE_FILE),
|
||||
'NAME': DATABASE_NAME,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
0
archivebox/core/templatetags/__init__.py
Normal file
47
archivebox/core/templatetags/core_tags.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
from django import template
|
||||
from django.urls import reverse
|
||||
from django.contrib.admin.templatetags.base import InclusionAdminNode
|
||||
from django.templatetags.static import static
|
||||
|
||||
|
||||
from typing import Union
|
||||
|
||||
from core.models import ArchiveResult
|
||||
|
||||
register = template.Library()
|
||||
|
||||
@register.simple_tag
|
||||
def snapshot_image(snapshot):
|
||||
result = ArchiveResult.objects.filter(snapshot=snapshot, extractor='screenshot', status='succeeded').first()
|
||||
if result:
|
||||
return reverse('LinkAssets', args=[f'{str(snapshot.timestamp)}/{result.output}'])
|
||||
|
||||
return static('archive.png')
|
||||
|
||||
@register.filter
|
||||
def file_size(num_bytes: Union[int, float]) -> str:
|
||||
for count in ['Bytes','KB','MB','GB']:
|
||||
if num_bytes > -1024.0 and num_bytes < 1024.0:
|
||||
return '%3.1f %s' % (num_bytes, count)
|
||||
num_bytes /= 1024.0
|
||||
return '%3.1f %s' % (num_bytes, 'TB')
|
||||
|
||||
def result_list(cl):
|
||||
"""
|
||||
Monkey patched result
|
||||
"""
|
||||
num_sorted_fields = 0
|
||||
return {
|
||||
'cl': cl,
|
||||
'num_sorted_fields': num_sorted_fields,
|
||||
'results': cl.result_list,
|
||||
}
|
||||
|
||||
@register.tag(name='snapshots_grid')
|
||||
def result_list_tag(parser, token):
|
||||
return InclusionAdminNode(
|
||||
parser, token,
|
||||
func=result_list,
|
||||
template_name='snapshots_grid.html',
|
||||
takes_context=False,
|
||||
)
|
|
@ -1,39 +0,0 @@
|
|||
from pathlib import Path
|
||||
|
||||
from django.utils.html import format_html
|
||||
|
||||
from core.models import Snapshot
|
||||
|
||||
|
||||
def get_icons(snapshot: Snapshot) -> str:
|
||||
link = snapshot.as_link()
|
||||
canon = link.canonical_outputs()
|
||||
out_dir = Path(link.link_dir)
|
||||
|
||||
# slow version: highlights icons based on whether files exist or not for that output
|
||||
# link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
|
||||
# fast version: all icons are highlighted without checking for outputs in filesystem
|
||||
link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
|
||||
|
||||
return format_html(
|
||||
'<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">'
|
||||
'<a href="/{}/{}" class="exists-{}" title="SingleFile">❶ </a>'
|
||||
'<a href="/{}/{}" class="exists-{}" title="Wget clone">🆆 </a> '
|
||||
'<a href="/{}/{}" class="exists-{}" title="HTML dump">🅷 </a> '
|
||||
'<a href="/{}/{}" class="exists-{}" title="PDF">📄 </a> '
|
||||
'<a href="/{}/{}" class="exists-{}" title="Screenshot">💻 </a> '
|
||||
'<a href="/{}/{}" class="exists-{}" title="WARC">📦 </a> '
|
||||
'<a href="/{}/{}/" class="exists-{}" title="Media files">📼 </a> '
|
||||
'<a href="/{}/{}/" class="exists-{}" title="Git repos">🅶 </a> '
|
||||
'<a href="{}" class="exists-{}" title="Archive.org snapshot">🏛 </a> '
|
||||
'</span>',
|
||||
*link_tuple(link, 'singlefile_path'),
|
||||
*link_tuple(link, 'wget_path')[:2], any((out_dir / link.domain).glob('*')),
|
||||
*link_tuple(link, 'dom_path'),
|
||||
*link_tuple(link, 'pdf_path'),
|
||||
*link_tuple(link, 'screenshot_path'),
|
||||
*link_tuple(link, 'warc_path')[:2], any((out_dir / canon['warc_path']).glob('*.warc.gz')),
|
||||
*link_tuple(link, 'media_path')[:2], any((out_dir / canon['media_path']).glob('*')),
|
||||
*link_tuple(link, 'git_path')[:2], any((out_dir / canon['git_path']).glob('*')),
|
||||
canon['archive_org_path'], (out_dir / 'archive.org.txt').exists(),
|
||||
)
|
|
@ -1,113 +0,0 @@
|
|||
# Taken from https://github.com/jazzband/django-taggit/blob/3b56adb637ab95aca5036c37a358402c825a367c/taggit/utils.py
|
||||
|
||||
def parse_tags(tagstring):
|
||||
"""
|
||||
Parses tag input, with multiple word input being activated and
|
||||
delineated by commas and double quotes. Quotes take precedence, so
|
||||
they may contain commas.
|
||||
|
||||
Returns a sorted list of unique tag names.
|
||||
|
||||
Ported from Jonathan Buchanan's `django-tagging
|
||||
<http://django-tagging.googlecode.com/>`_
|
||||
"""
|
||||
if not tagstring:
|
||||
return []
|
||||
|
||||
# Special case - if there are no commas or double quotes in the
|
||||
# input, we don't *do* a recall... I mean, we know we only need to
|
||||
# split on spaces.
|
||||
if "," not in tagstring and '"' not in tagstring:
|
||||
words = list(set(split_strip(tagstring, " ")))
|
||||
words.sort()
|
||||
return words
|
||||
|
||||
words = []
|
||||
buffer = []
|
||||
# Defer splitting of non-quoted sections until we know if there are
|
||||
# any unquoted commas.
|
||||
to_be_split = []
|
||||
saw_loose_comma = False
|
||||
open_quote = False
|
||||
i = iter(tagstring)
|
||||
try:
|
||||
while True:
|
||||
c = next(i)
|
||||
if c == '"':
|
||||
if buffer:
|
||||
to_be_split.append("".join(buffer))
|
||||
buffer = []
|
||||
# Find the matching quote
|
||||
open_quote = True
|
||||
c = next(i)
|
||||
while c != '"':
|
||||
buffer.append(c)
|
||||
c = next(i)
|
||||
if buffer:
|
||||
word = "".join(buffer).strip()
|
||||
if word:
|
||||
words.append(word)
|
||||
buffer = []
|
||||
open_quote = False
|
||||
else:
|
||||
if not saw_loose_comma and c == ",":
|
||||
saw_loose_comma = True
|
||||
buffer.append(c)
|
||||
except StopIteration:
|
||||
# If we were parsing an open quote which was never closed treat
|
||||
# the buffer as unquoted.
|
||||
if buffer:
|
||||
if open_quote and "," in buffer:
|
||||
saw_loose_comma = True
|
||||
to_be_split.append("".join(buffer))
|
||||
if to_be_split:
|
||||
if saw_loose_comma:
|
||||
delimiter = ","
|
||||
else:
|
||||
delimiter = " "
|
||||
for chunk in to_be_split:
|
||||
words.extend(split_strip(chunk, delimiter))
|
||||
words = list(set(words))
|
||||
words.sort()
|
||||
return words
|
||||
|
||||
|
||||
def split_strip(string, delimiter=","):
|
||||
"""
|
||||
Splits ``string`` on ``delimiter``, stripping each resulting string
|
||||
and returning a list of non-empty strings.
|
||||
|
||||
Ported from Jonathan Buchanan's `django-tagging
|
||||
<http://django-tagging.googlecode.com/>`_
|
||||
"""
|
||||
if not string:
|
||||
return []
|
||||
|
||||
words = [w.strip() for w in string.split(delimiter)]
|
||||
return [w for w in words if w]
|
||||
|
||||
|
||||
def edit_string_for_tags(tags):
|
||||
"""
|
||||
Given list of ``Tag`` instances, creates a string representation of
|
||||
the list suitable for editing by the user, such that submitting the
|
||||
given string representation back without changing it will give the
|
||||
same list of tags.
|
||||
|
||||
Tag names which contain commas will be double quoted.
|
||||
|
||||
If any tag name which isn't being quoted contains whitespace, the
|
||||
resulting string of tag names will be comma-delimited, otherwise
|
||||
it will be space-delimited.
|
||||
|
||||
Ported from Jonathan Buchanan's `django-tagging
|
||||
<http://django-tagging.googlecode.com/>`_
|
||||
"""
|
||||
names = []
|
||||
for tag in tags:
|
||||
name = tag.name
|
||||
if "," in name or " " in name:
|
||||
names.append('"%s"' % name)
|
||||
else:
|
||||
names.append(name)
|
||||
return ", ".join(sorted(names))
|
|
@ -12,17 +12,19 @@ from django.views.generic import FormView
|
|||
from django.contrib.auth.mixins import UserPassesTestMixin
|
||||
|
||||
from core.models import Snapshot
|
||||
from core.utils import get_icons
|
||||
from core.forms import AddLinkForm
|
||||
|
||||
from ..config import (
|
||||
OUTPUT_DIR,
|
||||
PUBLIC_INDEX,
|
||||
PUBLIC_SNAPSHOTS,
|
||||
PUBLIC_ADD_VIEW
|
||||
PUBLIC_ADD_VIEW,
|
||||
VERSION,
|
||||
FOOTER_INFO,
|
||||
)
|
||||
from main import add
|
||||
from ..util import base_url, ansi_to_html
|
||||
from ..index.html import snapshot_icons
|
||||
|
||||
|
||||
class MainIndex(View):
|
||||
|
@ -94,13 +96,20 @@ class PublicArchiveView(ListView):
|
|||
paginate_by = 100
|
||||
ordering = ['title']
|
||||
|
||||
def get_context_data(self, **kwargs):
|
||||
return {
|
||||
**super().get_context_data(**kwargs),
|
||||
'VERSION': VERSION,
|
||||
'FOOTER_INFO': FOOTER_INFO,
|
||||
}
|
||||
|
||||
def get_queryset(self, **kwargs):
|
||||
qs = super().get_queryset(**kwargs)
|
||||
query = self.request.GET.get('q')
|
||||
if query:
|
||||
qs = qs.filter(title__icontains=query)
|
||||
for snapshot in qs:
|
||||
snapshot.icons = get_icons(snapshot)
|
||||
snapshot.icons = snapshot_icons(snapshot)
|
||||
return qs
|
||||
|
||||
def get(self, *args, **kwargs):
|
||||
|
@ -127,23 +136,29 @@ class AddView(UserPassesTestMixin, FormView):
|
|||
def test_func(self):
|
||||
return PUBLIC_ADD_VIEW or self.request.user.is_authenticated
|
||||
|
||||
def get_context_data(self, *args, **kwargs):
|
||||
context = super().get_context_data(*args, **kwargs)
|
||||
context["title"] = "Add URLs"
|
||||
# We can't just call request.build_absolute_uri in the template, because it would include query parameters
|
||||
context["absolute_add_path"] = self.request.build_absolute_uri(self.request.path)
|
||||
return context
|
||||
def get_context_data(self, **kwargs):
|
||||
return {
|
||||
**super().get_context_data(**kwargs),
|
||||
'title': "Add URLs",
|
||||
# We can't just call request.build_absolute_uri in the template, because it would include query parameters
|
||||
'absolute_add_path': self.request.build_absolute_uri(self.request.path),
|
||||
'VERSION': VERSION,
|
||||
'FOOTER_INFO': FOOTER_INFO,
|
||||
}
|
||||
|
||||
def form_valid(self, form):
|
||||
url = form.cleaned_data["url"]
|
||||
print(f'[+] Adding URL: {url}')
|
||||
depth = 0 if form.cleaned_data["depth"] == "0" else 1
|
||||
extractors = ','.join(form.cleaned_data["archive_methods"])
|
||||
input_kwargs = {
|
||||
"urls": url,
|
||||
"depth": depth,
|
||||
"update_all": False,
|
||||
"out_dir": OUTPUT_DIR,
|
||||
}
|
||||
if extractors:
|
||||
input_kwargs.update({"extractors": extractors})
|
||||
add_stdout = StringIO()
|
||||
with redirect_stdout(add_stdout):
|
||||
add(**input_kwargs)
|
||||
|
|
|
@ -8,6 +8,7 @@ from datetime import datetime
|
|||
from django.db.models import QuerySet
|
||||
|
||||
from ..index.schema import Link
|
||||
from ..index.sql import write_link_to_sql_index
|
||||
from ..index import (
|
||||
load_link_details,
|
||||
write_link_details,
|
||||
|
@ -22,6 +23,7 @@ from ..logging_util import (
|
|||
log_archive_method_started,
|
||||
log_archive_method_finished,
|
||||
)
|
||||
from ..search import write_search_index
|
||||
|
||||
from .title import should_save_title, save_title
|
||||
from .favicon import should_save_favicon, save_favicon
|
||||
|
@ -37,6 +39,7 @@ from .media import should_save_media, save_media
|
|||
from .archive_org import should_save_archive_dot_org, save_archive_dot_org
|
||||
from .headers import should_save_headers, save_headers
|
||||
|
||||
|
||||
def get_default_archive_methods():
|
||||
return [
|
||||
('title', should_save_title, save_title),
|
||||
|
@ -54,6 +57,8 @@ def get_default_archive_methods():
|
|||
('archive_org', should_save_archive_dot_org, save_archive_dot_org),
|
||||
]
|
||||
|
||||
ARCHIVE_METHODS_INDEXING_PRECEDENCE = [('readability', 1), ('singlefile', 2), ('dom', 3), ('wget', 4)]
|
||||
|
||||
@enforce_types
|
||||
def ignore_methods(to_ignore: List[str]):
|
||||
ARCHIVE_METHODS = get_default_archive_methods()
|
||||
|
@ -62,9 +67,16 @@ def ignore_methods(to_ignore: List[str]):
|
|||
return list(methods)
|
||||
|
||||
@enforce_types
|
||||
def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None, skip_index: bool=False) -> Link:
|
||||
def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None) -> Link:
|
||||
"""download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""
|
||||
|
||||
# TODO: Remove when the input is changed to be a snapshot. Suboptimal approach.
|
||||
from core.models import Snapshot, ArchiveResult
|
||||
try:
|
||||
snapshot = Snapshot.objects.get(url=link.url) # TODO: This will be unnecessary once everything is a snapshot
|
||||
except Snapshot.DoesNotExist:
|
||||
snapshot = write_link_to_sql_index(link)
|
||||
|
||||
ARCHIVE_METHODS = get_default_archive_methods()
|
||||
|
||||
if methods:
|
||||
|
@ -80,7 +92,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
|||
os.makedirs(out_dir)
|
||||
|
||||
link = load_link_details(link, out_dir=out_dir)
|
||||
write_link_details(link, out_dir=out_dir, skip_sql_index=skip_index)
|
||||
write_link_details(link, out_dir=out_dir, skip_sql_index=False)
|
||||
log_link_archiving_started(link, out_dir, is_new)
|
||||
link = link.overwrite(updated=datetime.now())
|
||||
stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
|
||||
|
@ -99,6 +111,10 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
|||
|
||||
stats[result.status] += 1
|
||||
log_archive_method_finished(result)
|
||||
write_search_index(link=link, texts=result.index_texts)
|
||||
ArchiveResult.objects.create(snapshot=snapshot, extractor=method_name, cmd=result.cmd, cmd_version=result.cmd_version,
|
||||
output=result.output, pwd=result.pwd, start_ts=result.start_ts, end_ts=result.end_ts, status=result.status)
|
||||
|
||||
else:
|
||||
# print('{black} X {}{reset}'.format(method_name, **ANSI))
|
||||
stats['skipped'] += 1
|
||||
|
@ -117,7 +133,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
write_link_details(link, out_dir=out_dir, skip_sql_index=skip_index)
|
||||
write_link_details(link, out_dir=out_dir, skip_sql_index=False)
|
||||
|
||||
log_link_archiving_finished(link, link.link_dir, is_new, stats)
|
||||
|
||||
|
|
|
@ -71,6 +71,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
|
|||
CURL_BINARY,
|
||||
link.url
|
||||
]
|
||||
readability_content = None
|
||||
timer = TimedProgress(timeout, prefix=' ')
|
||||
try:
|
||||
document = get_html(link, out_dir)
|
||||
|
@ -86,8 +87,9 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
|
|||
result = run(cmd, cwd=out_dir, timeout=timeout)
|
||||
result_json = json.loads(result.stdout)
|
||||
output_folder.mkdir(exist_ok=True)
|
||||
readability_content = result_json.pop("textContent")
|
||||
atomic_write(str(output_folder / "content.html"), result_json.pop("content"))
|
||||
atomic_write(str(output_folder / "content.txt"), result_json.pop("textContent"))
|
||||
atomic_write(str(output_folder / "content.txt"), readability_content)
|
||||
atomic_write(str(output_folder / "article.json"), result_json)
|
||||
|
||||
# parse out number of files downloaded from last line of stderr:
|
||||
|
@ -117,5 +119,6 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
|
|||
cmd_version=READABILITY_VERSION,
|
||||
output=output,
|
||||
status=status,
|
||||
**timer.stats,
|
||||
index_texts= [readability_content] if readability_content else [],
|
||||
**timer.stats,
|
||||
)
|
||||
|
|
|
@ -20,7 +20,6 @@ from ..config import (
|
|||
CURL_ARGS,
|
||||
CURL_VERSION,
|
||||
CURL_USER_AGENT,
|
||||
setup_django,
|
||||
)
|
||||
from ..logging_util import TimedProgress
|
||||
|
||||
|
@ -81,7 +80,6 @@ def extract_title_with_regex(html):
|
|||
def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
|
||||
"""try to guess the page's title from its content"""
|
||||
|
||||
setup_django(out_dir=out_dir)
|
||||
from core.models import Snapshot
|
||||
|
||||
output: ArchiveOutput = None
|
||||
|
|
|
@ -18,7 +18,6 @@ from ..util import (
|
|||
ExtendedEncoder,
|
||||
)
|
||||
from ..config import (
|
||||
setup_django,
|
||||
ARCHIVE_DIR_NAME,
|
||||
SQL_INDEX_FILENAME,
|
||||
JSON_INDEX_FILENAME,
|
||||
|
@ -51,6 +50,8 @@ from .sql import (
|
|||
write_sql_link_details,
|
||||
)
|
||||
|
||||
from ..search import search_backend_enabled, query_search_index
|
||||
|
||||
### Link filtering and checking
|
||||
|
||||
@enforce_types
|
||||
|
@ -221,7 +222,7 @@ def timed_index_update(out_path: Path):
|
|||
|
||||
|
||||
@enforce_types
|
||||
def write_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, finished: bool=False) -> None:
|
||||
def write_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None:
|
||||
"""Writes links to sqlite3 file for a given list of links"""
|
||||
|
||||
log_indexing_process_started(len(links))
|
||||
|
@ -241,16 +242,9 @@ def write_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, finished: bool
|
|||
|
||||
log_indexing_process_finished()
|
||||
|
||||
@enforce_types
|
||||
def get_empty_snapshot_queryset(out_dir: Path=OUTPUT_DIR):
|
||||
setup_django(out_dir, check_db=True)
|
||||
from core.models import Snapshot
|
||||
return Snapshot.objects.none()
|
||||
|
||||
@enforce_types
|
||||
def load_main_index(out_dir: Path=OUTPUT_DIR, warn: bool=True) -> List[Link]:
|
||||
"""parse and load existing index with any new links from import_path merged in"""
|
||||
setup_django(out_dir, check_db=True)
|
||||
from core.models import Snapshot
|
||||
try:
|
||||
return Snapshot.objects.all()
|
||||
|
@ -365,7 +359,7 @@ LINK_FILTERS = {
|
|||
}
|
||||
|
||||
@enforce_types
|
||||
def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
|
||||
def q_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
|
||||
q_filter = Q()
|
||||
for pattern in filter_patterns:
|
||||
try:
|
||||
|
@ -380,10 +374,36 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type
|
|||
raise SystemExit(2)
|
||||
return snapshots.filter(q_filter)
|
||||
|
||||
def search_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='search') -> QuerySet:
|
||||
if not search_backend_enabled():
|
||||
stderr()
|
||||
stderr(
|
||||
'[X] The search backend is not enabled, set config.USE_SEARCHING_BACKEND = True',
|
||||
color='red',
|
||||
)
|
||||
raise SystemExit(2)
|
||||
from core.models import Snapshot
|
||||
|
||||
qsearch = Snapshot.objects.none()
|
||||
for pattern in filter_patterns:
|
||||
try:
|
||||
qsearch |= query_search_index(pattern)
|
||||
except:
|
||||
raise SystemExit(2)
|
||||
|
||||
return snapshots & qsearch
|
||||
|
||||
@enforce_types
|
||||
def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
|
||||
if filter_type != 'search':
|
||||
return q_filter(snapshots, filter_patterns, filter_type)
|
||||
else:
|
||||
return search_filter(snapshots, filter_patterns, filter_type)
|
||||
|
||||
|
||||
def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||
"""indexed links without checking archive status or data directory validity"""
|
||||
links = [snapshot.as_link() for snapshot in snapshots.iterator()]
|
||||
links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
|
||||
return {
|
||||
link.link_dir: link
|
||||
for link in links
|
||||
|
@ -391,7 +411,7 @@ def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Option
|
|||
|
||||
def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||
"""indexed links that are archived with a valid data directory"""
|
||||
links = [snapshot.as_link() for snapshot in snapshots.iterator()]
|
||||
links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
|
||||
return {
|
||||
link.link_dir: link
|
||||
for link in filter(is_archived, links)
|
||||
|
@ -399,7 +419,7 @@ def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optio
|
|||
|
||||
def get_unarchived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||
"""indexed links that are unarchived with no data directory or an empty data directory"""
|
||||
links = [snapshot.as_link() for snapshot in snapshots.iterator()]
|
||||
links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
|
||||
return {
|
||||
link.link_dir: link
|
||||
for link in filter(is_unarchived, links)
|
||||
|
@ -424,7 +444,7 @@ def get_present_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Option
|
|||
|
||||
def get_valid_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||
"""dirs with a valid index matched to the main index and archived content"""
|
||||
links = [snapshot.as_link() for snapshot in snapshots.iterator()]
|
||||
links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
|
||||
return {
|
||||
link.link_dir: link
|
||||
for link in filter(is_valid, links)
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
__package__ = 'archivebox.index'
|
||||
|
||||
from string import Template
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Iterator, Mapping
|
||||
from pathlib import Path
|
||||
|
||||
from django.utils.html import format_html
|
||||
from collections import defaultdict
|
||||
|
||||
from .schema import Link
|
||||
from ..system import atomic_write, copy_and_overwrite
|
||||
from ..system import atomic_write
|
||||
from ..logging_util import printable_filesize
|
||||
from ..util import (
|
||||
enforce_types,
|
||||
|
@ -17,21 +19,15 @@ from ..util import (
|
|||
)
|
||||
from ..config import (
|
||||
OUTPUT_DIR,
|
||||
TEMPLATES_DIR,
|
||||
VERSION,
|
||||
GIT_SHA,
|
||||
FOOTER_INFO,
|
||||
ARCHIVE_DIR_NAME,
|
||||
HTML_INDEX_FILENAME,
|
||||
STATIC_DIR_NAME,
|
||||
ROBOTS_TXT_FILENAME,
|
||||
FAVICON_FILENAME,
|
||||
)
|
||||
|
||||
MAIN_INDEX_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index.html')
|
||||
MINIMAL_INDEX_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index_minimal.html')
|
||||
MAIN_INDEX_ROW_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index_row.html')
|
||||
LINK_DETAILS_TEMPLATE = str(Path(TEMPLATES_DIR) / 'link_details.html')
|
||||
MAIN_INDEX_TEMPLATE = 'main_index.html'
|
||||
MINIMAL_INDEX_TEMPLATE = 'main_index_minimal.html'
|
||||
LINK_DETAILS_TEMPLATE = 'link_details.html'
|
||||
TITLE_LOADING_MSG = 'Not yet archived...'
|
||||
|
||||
|
||||
|
@ -50,62 +46,25 @@ def parse_html_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[str]:
|
|||
return ()
|
||||
|
||||
@enforce_types
|
||||
def write_html_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, finished: bool=False) -> None:
|
||||
"""write the html link index to a given path"""
|
||||
|
||||
copy_and_overwrite(str(Path(TEMPLATES_DIR) / FAVICON_FILENAME), str(out_dir / FAVICON_FILENAME))
|
||||
copy_and_overwrite(str(Path(TEMPLATES_DIR) / ROBOTS_TXT_FILENAME), str(out_dir / ROBOTS_TXT_FILENAME))
|
||||
copy_and_overwrite(str(Path(TEMPLATES_DIR) / STATIC_DIR_NAME), str(out_dir / STATIC_DIR_NAME))
|
||||
|
||||
rendered_html = main_index_template(links, finished=finished)
|
||||
atomic_write(str(out_dir / HTML_INDEX_FILENAME), rendered_html)
|
||||
|
||||
def generate_index_from_links(links: List[Link], with_headers: bool):
|
||||
if with_headers:
|
||||
output = main_index_template(links)
|
||||
else:
|
||||
output = main_index_template(links, template=MINIMAL_INDEX_TEMPLATE)
|
||||
return output
|
||||
|
||||
@enforce_types
|
||||
def main_index_template(links: List[Link], finished: bool=True, template: str=MAIN_INDEX_TEMPLATE) -> str:
|
||||
def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) -> str:
|
||||
"""render the template for the entire main index"""
|
||||
|
||||
return render_legacy_template(template, {
|
||||
return render_django_template(template, {
|
||||
'version': VERSION,
|
||||
'git_sha': GIT_SHA,
|
||||
'num_links': str(len(links)),
|
||||
'status': 'finished' if finished else 'running',
|
||||
'date_updated': datetime.now().strftime('%Y-%m-%d'),
|
||||
'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'),
|
||||
'rows': '\n'.join(
|
||||
main_index_row_template(link)
|
||||
for link in links
|
||||
),
|
||||
'footer_info': FOOTER_INFO,
|
||||
})
|
||||
|
||||
|
||||
@enforce_types
|
||||
def main_index_row_template(link: Link) -> str:
|
||||
"""render the template for an individual link row of the main index"""
|
||||
|
||||
from ..extractors.wget import wget_output_path
|
||||
|
||||
return render_legacy_template(MAIN_INDEX_ROW_TEMPLATE, {
|
||||
**link._asdict(extended=True),
|
||||
|
||||
# before pages are finished archiving, show loading msg instead of title
|
||||
'title': htmlencode(
|
||||
link.title
|
||||
or (link.base_url if link.is_archived else TITLE_LOADING_MSG)
|
||||
),
|
||||
|
||||
# before pages are finished archiving, show fallback loading favicon
|
||||
'favicon_url': (
|
||||
str(Path(ARCHIVE_DIR_NAME) / link.timestamp / 'favicon.ico')
|
||||
# if link['is_archived'] else 'data:image/gif;base64,R0lGODlhAQABAAD/ACwAAAAAAQABAAACADs='
|
||||
),
|
||||
|
||||
# before pages are finished archiving, show the details page instead
|
||||
'wget_url': urlencode(wget_output_path(link) or 'index.html'),
|
||||
|
||||
# replace commas in tags with spaces, or file extension if it's static
|
||||
'tags': (link.tags or '') + (' {}'.format(link.extension) if link.is_static else ''),
|
||||
'links': [link._asdict(extended=True) for link in links],
|
||||
'FOOTER_INFO': FOOTER_INFO,
|
||||
})
|
||||
|
||||
|
||||
|
@ -126,7 +85,7 @@ def link_details_template(link: Link) -> str:
|
|||
|
||||
link_info = link._asdict(extended=True)
|
||||
|
||||
return render_legacy_template(LINK_DETAILS_TEMPLATE, {
|
||||
return render_django_template(LINK_DETAILS_TEMPLATE, {
|
||||
**link_info,
|
||||
**link_info['canonical'],
|
||||
'title': htmlencode(
|
||||
|
@ -146,12 +105,60 @@ def link_details_template(link: Link) -> str:
|
|||
'oldest_archive_date': ts_to_date(link.oldest_archive_date),
|
||||
})
|
||||
|
||||
|
||||
@enforce_types
|
||||
def render_legacy_template(template_path: str, context: Mapping[str, str]) -> str:
|
||||
def render_django_template(template: str, context: Mapping[str, str]) -> str:
|
||||
"""render a given html template string with the given template content"""
|
||||
from django.template.loader import render_to_string
|
||||
|
||||
# will be replaced by django templates in the future
|
||||
with open(template_path, 'r', encoding='utf-8') as template:
|
||||
template_str = template.read()
|
||||
return Template(template_str).substitute(**context)
|
||||
return render_to_string(template, context)
|
||||
|
||||
|
||||
def snapshot_icons(snapshot) -> str:
|
||||
from core.models import EXTRACTORS
|
||||
|
||||
archive_results = snapshot.archiveresult_set.filter(status="succeeded")
|
||||
link = snapshot.as_link()
|
||||
path = link.archive_path
|
||||
canon = link.canonical_outputs()
|
||||
output = ""
|
||||
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
|
||||
icons = {
|
||||
"singlefile": "❶",
|
||||
"wget": "🆆",
|
||||
"dom": "🅷",
|
||||
"pdf": "📄",
|
||||
"screenshot": "💻",
|
||||
"media": "📼",
|
||||
"git": "🅶",
|
||||
"archive_org": "🏛",
|
||||
"readability": "🆁",
|
||||
"mercury": "🅼",
|
||||
"warc": "📦"
|
||||
}
|
||||
exclude = ["favicon", "title", "headers", "archive_org"]
|
||||
# Missing specific entry for WARC
|
||||
|
||||
extractor_items = defaultdict(lambda: None)
|
||||
for extractor, _ in EXTRACTORS:
|
||||
for result in archive_results:
|
||||
if result.extractor == extractor:
|
||||
extractor_items[extractor] = result
|
||||
|
||||
for extractor, _ in EXTRACTORS:
|
||||
if extractor not in exclude:
|
||||
exists = extractor_items[extractor] is not None
|
||||
output += output_template.format(path, canon[f"{extractor}_path"], str(exists),
|
||||
extractor, icons.get(extractor, "?"))
|
||||
if extractor == "wget":
|
||||
# warc isn't technically it's own extractor, so we have to add it after wget
|
||||
exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
|
||||
output += output_template.format(exists[0] if exists else '#', canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?"))
|
||||
|
||||
if extractor == "archive_org":
|
||||
# The check for archive_org is different, so it has to be handled separately
|
||||
target_path = Path(path) / "archive.org.txt"
|
||||
exists = target_path.exists()
|
||||
output += '<a href="{}" class="exists-{}" title="{}">{}</a> '.format(canon["archive_org_path"], str(exists),
|
||||
"archive_org", icons.get("archive_org", "?"))
|
||||
|
||||
return format_html(f'<span class="files-icons" style="font-size: 1.1em; opacity: 0.8">{output}<span>')
|
||||
|
|
|
@ -8,7 +8,7 @@ from pathlib import Path
|
|||
from datetime import datetime
|
||||
from typing import List, Optional, Iterator, Any, Union
|
||||
|
||||
from .schema import Link, ArchiveResult
|
||||
from .schema import Link
|
||||
from ..system import atomic_write
|
||||
from ..util import enforce_types
|
||||
from ..config import (
|
||||
|
@ -39,7 +39,20 @@ MAIN_INDEX_HEADER = {
|
|||
},
|
||||
}
|
||||
|
||||
### Main Links Index
|
||||
@enforce_types
|
||||
def generate_json_index_from_links(links: List[Link], with_headers: bool):
|
||||
if with_headers:
|
||||
output = {
|
||||
**MAIN_INDEX_HEADER,
|
||||
'num_links': len(links),
|
||||
'updated': datetime.now(),
|
||||
'last_run_cmd': sys.argv,
|
||||
'links': links,
|
||||
}
|
||||
else:
|
||||
output = links
|
||||
return to_json(output, indent=4, sort_keys=True)
|
||||
|
||||
|
||||
@enforce_types
|
||||
def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
|
||||
|
@ -65,30 +78,6 @@ def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
|
|||
continue
|
||||
return ()
|
||||
|
||||
@enforce_types
|
||||
def write_json_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None:
|
||||
"""write the json link index to a given path"""
|
||||
|
||||
assert isinstance(links, List), 'Links must be a list, not a generator.'
|
||||
assert not links or isinstance(links[0].history, dict)
|
||||
assert not links or isinstance(links[0].sources, list)
|
||||
|
||||
if links and links[0].history.get('title'):
|
||||
assert isinstance(links[0].history['title'][0], ArchiveResult)
|
||||
|
||||
if links and links[0].sources:
|
||||
assert isinstance(links[0].sources[0], str)
|
||||
|
||||
main_index_json = {
|
||||
**MAIN_INDEX_HEADER,
|
||||
'num_links': len(links),
|
||||
'updated': datetime.now(),
|
||||
'last_run_cmd': sys.argv,
|
||||
'links': links,
|
||||
}
|
||||
atomic_write(str(Path(out_dir) / JSON_INDEX_FILENAME), main_index_json)
|
||||
|
||||
|
||||
### Link Details Index
|
||||
|
||||
@enforce_types
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
"""
|
||||
|
||||
WARNING: THIS FILE IS ALL LEGACY CODE TO BE REMOVED.
|
||||
|
||||
DO NOT ADD ANY NEW FEATURES TO THIS FILE, NEW CODE GOES HERE: core/models.py
|
||||
|
||||
"""
|
||||
|
||||
__package__ = 'archivebox.index'
|
||||
|
||||
from pathlib import Path
|
||||
|
@ -31,6 +39,7 @@ class ArchiveResult:
|
|||
status: str
|
||||
start_ts: datetime
|
||||
end_ts: datetime
|
||||
index_texts: Union[List[str], None] = None
|
||||
schema: str = 'ArchiveResult'
|
||||
|
||||
def __post_init__(self):
|
||||
|
@ -207,6 +216,10 @@ class Link:
|
|||
})
|
||||
return info
|
||||
|
||||
def as_snapshot(self):
|
||||
from core.models import Snapshot
|
||||
return Snapshot.objects.get(url=self.url)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json_info, guess=False):
|
||||
from ..util import parse_date
|
||||
|
@ -339,7 +352,7 @@ class Link:
|
|||
### Archive Status Helpers
|
||||
@property
|
||||
def num_outputs(self) -> int:
|
||||
return len(tuple(filter(None, self.latest_outputs().values())))
|
||||
return self.as_snapshot().num_outputs
|
||||
|
||||
@property
|
||||
def num_failures(self) -> int:
|
||||
|
|
|
@ -4,17 +4,17 @@ from io import StringIO
|
|||
from pathlib import Path
|
||||
from typing import List, Tuple, Iterator
|
||||
from django.db.models import QuerySet
|
||||
from django.db import transaction
|
||||
|
||||
from .schema import Link
|
||||
from ..util import enforce_types
|
||||
from ..config import setup_django, OUTPUT_DIR
|
||||
from ..config import OUTPUT_DIR
|
||||
|
||||
|
||||
### Main Links Index
|
||||
|
||||
@enforce_types
|
||||
def parse_sql_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
|
||||
setup_django(out_dir, check_db=True)
|
||||
from core.models import Snapshot
|
||||
|
||||
return (
|
||||
|
@ -24,9 +24,6 @@ def parse_sql_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
|
|||
|
||||
@enforce_types
|
||||
def remove_from_sql_main_index(snapshots: QuerySet, out_dir: Path=OUTPUT_DIR) -> None:
|
||||
setup_django(out_dir, check_db=True)
|
||||
from django.db import transaction
|
||||
|
||||
with transaction.atomic():
|
||||
snapshots.delete()
|
||||
|
||||
|
@ -51,9 +48,6 @@ def write_link_to_sql_index(link: Link):
|
|||
|
||||
@enforce_types
|
||||
def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None:
|
||||
setup_django(out_dir, check_db=True)
|
||||
from django.db import transaction
|
||||
|
||||
with transaction.atomic():
|
||||
for link in links:
|
||||
write_link_to_sql_index(link)
|
||||
|
@ -61,9 +55,7 @@ def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None:
|
|||
|
||||
@enforce_types
|
||||
def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None:
|
||||
setup_django(out_dir, check_db=True)
|
||||
from core.models import Snapshot
|
||||
from django.db import transaction
|
||||
|
||||
with transaction.atomic():
|
||||
try:
|
||||
|
@ -84,7 +76,6 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None:
|
|||
|
||||
@enforce_types
|
||||
def list_migrations(out_dir: Path=OUTPUT_DIR) -> List[Tuple[bool, str]]:
|
||||
setup_django(out_dir, check_db=False)
|
||||
from django.core.management import call_command
|
||||
out = StringIO()
|
||||
call_command("showmigrations", list=True, stdout=out)
|
||||
|
@ -101,7 +92,6 @@ def list_migrations(out_dir: Path=OUTPUT_DIR) -> List[Tuple[bool, str]]:
|
|||
|
||||
@enforce_types
|
||||
def apply_migrations(out_dir: Path=OUTPUT_DIR) -> List[str]:
|
||||
setup_django(out_dir, check_db=False)
|
||||
from django.core.management import call_command
|
||||
null, out = StringIO(), StringIO()
|
||||
call_command("makemigrations", interactive=False, stdout=null)
|
||||
|
@ -112,6 +102,5 @@ def apply_migrations(out_dir: Path=OUTPUT_DIR) -> List[str]:
|
|||
|
||||
@enforce_types
|
||||
def get_admins(out_dir: Path=OUTPUT_DIR) -> List[str]:
|
||||
setup_django(out_dir, check_db=False)
|
||||
from django.contrib.auth.models import User
|
||||
return User.objects.filter(is_superuser=True)
|
||||
|
|
|
@ -19,6 +19,7 @@ if TYPE_CHECKING:
|
|||
from .util import enforce_types
|
||||
from .config import (
|
||||
ConfigDict,
|
||||
OUTPUT_DIR,
|
||||
PYTHON_ENCODING,
|
||||
ANSI,
|
||||
IS_TTY,
|
||||
|
@ -443,7 +444,7 @@ def log_shell_welcome_msg():
|
|||
from .cli import list_subcommands
|
||||
|
||||
print('{green}# ArchiveBox Imports{reset}'.format(**ANSI))
|
||||
print('{green}from archivebox.core.models import Snapshot, User{reset}'.format(**ANSI))
|
||||
print('{green}from core.models import Snapshot, User{reset}'.format(**ANSI))
|
||||
print('{green}from archivebox import *\n {}{reset}'.format("\n ".join(list_subcommands().keys()), **ANSI))
|
||||
print()
|
||||
print('[i] Welcome to the ArchiveBox Shell!')
|
||||
|
@ -477,39 +478,7 @@ def printable_filesize(num_bytes: Union[int, float]) -> str:
|
|||
|
||||
@enforce_types
|
||||
def printable_folders(folders: Dict[str, Optional["Link"]],
|
||||
json: bool=False,
|
||||
html: bool=False,
|
||||
csv: Optional[str]=None,
|
||||
with_headers: bool=False) -> str:
|
||||
|
||||
from .index.json import MAIN_INDEX_HEADER
|
||||
|
||||
links = folders.values()
|
||||
if json:
|
||||
from .index.json import to_json
|
||||
if with_headers:
|
||||
output = {
|
||||
**MAIN_INDEX_HEADER,
|
||||
'num_links': len(links),
|
||||
'updated': datetime.now(),
|
||||
'last_run_cmd': sys.argv,
|
||||
'links': links,
|
||||
}
|
||||
else:
|
||||
output = links
|
||||
return to_json(output, indent=4, sort_keys=True)
|
||||
elif html:
|
||||
from .index.html import main_index_template
|
||||
if with_headers:
|
||||
output = main_index_template(links, True)
|
||||
else:
|
||||
from .index.html import MINIMAL_INDEX_TEMPLATE
|
||||
output = main_index_template(links, True, MINIMAL_INDEX_TEMPLATE)
|
||||
return output
|
||||
elif csv:
|
||||
from .index.csv import links_to_csv
|
||||
return links_to_csv(folders.values(), cols=csv.split(','), header=with_headers)
|
||||
|
||||
return '\n'.join(
|
||||
f'{folder} {link and link.url} "{link and link.title}"'
|
||||
for folder, link in folders.items()
|
||||
|
@ -546,19 +515,24 @@ def printable_folder_status(name: str, folder: Dict) -> str:
|
|||
else:
|
||||
num_files = 'missing'
|
||||
|
||||
if ' ' in str(folder['path']):
|
||||
folder['path'] = f'"{folder["path"]}"'
|
||||
path = str(folder['path']).replace(str(OUTPUT_DIR), '.') if folder['path'] else ''
|
||||
if path and ' ' in path:
|
||||
path = f'"{path}"'
|
||||
|
||||
# if path is just a plain dot, replace it back with the full path for clarity
|
||||
if path == '.':
|
||||
path = str(OUTPUT_DIR)
|
||||
|
||||
return ' '.join((
|
||||
ANSI[color],
|
||||
symbol,
|
||||
ANSI['reset'],
|
||||
name.ljust(22),
|
||||
(str(folder["path"]) or '').ljust(76),
|
||||
name.ljust(21),
|
||||
num_files.ljust(14),
|
||||
ANSI[color],
|
||||
note,
|
||||
note.ljust(8),
|
||||
ANSI['reset'],
|
||||
path.ljust(76),
|
||||
))
|
||||
|
||||
|
||||
|
@ -578,17 +552,18 @@ def printable_dependency_version(name: str, dependency: Dict) -> str:
|
|||
else:
|
||||
color, symbol, note, version = 'lightyellow', '-', 'disabled', '-'
|
||||
|
||||
if ' ' in (dependency["path"] or ''):
|
||||
dependency["path"] = f'"{dependency["path"]}"'
|
||||
path = str(dependency["path"]).replace(str(OUTPUT_DIR), '.') if dependency["path"] else ''
|
||||
if path and ' ' in path:
|
||||
path = f'"{path}"'
|
||||
|
||||
return ' '.join((
|
||||
ANSI[color],
|
||||
symbol,
|
||||
ANSI['reset'],
|
||||
name.ljust(22),
|
||||
(dependency["path"] or '').ljust(76),
|
||||
name.ljust(21),
|
||||
version.ljust(14),
|
||||
ANSI[color],
|
||||
note,
|
||||
note.ljust(8),
|
||||
ANSI['reset'],
|
||||
path.ljust(76),
|
||||
))
|
||||
|
|
|
@ -29,7 +29,6 @@ from .util import enforce_types # type: ignore
|
|||
from .system import get_dir_size, dedupe_cron_jobs, CRON_COMMENT
|
||||
from .index import (
|
||||
load_main_index,
|
||||
get_empty_snapshot_queryset,
|
||||
parse_links_from_source,
|
||||
dedupe_links,
|
||||
write_main_index,
|
||||
|
@ -45,16 +44,22 @@ from .index import (
|
|||
get_corrupted_folders,
|
||||
get_unrecognized_folders,
|
||||
fix_invalid_folder_locations,
|
||||
write_link_details,
|
||||
)
|
||||
from .index.json import (
|
||||
parse_json_main_index,
|
||||
parse_json_links_details,
|
||||
generate_json_index_from_links,
|
||||
)
|
||||
from .index.sql import (
|
||||
get_admins,
|
||||
apply_migrations,
|
||||
remove_from_sql_main_index,
|
||||
)
|
||||
from .index.html import (
|
||||
generate_index_from_links,
|
||||
)
|
||||
from .index.csv import links_to_csv
|
||||
from .extractors import archive_links, archive_link, ignore_methods
|
||||
from .config import (
|
||||
stderr,
|
||||
|
@ -83,7 +88,6 @@ from .config import (
|
|||
check_dependencies,
|
||||
check_data_folder,
|
||||
write_config_file,
|
||||
setup_django,
|
||||
VERSION,
|
||||
CODE_LOCATIONS,
|
||||
EXTERNAL_LOCATIONS,
|
||||
|
@ -110,6 +114,7 @@ from .logging_util import (
|
|||
printable_dependency_version,
|
||||
)
|
||||
|
||||
from .search import flush_search_index, index_links
|
||||
|
||||
ALLOWED_IN_OUTPUT_DIR = {
|
||||
'lost+found',
|
||||
|
@ -212,7 +217,7 @@ def version(quiet: bool=False,
|
|||
else:
|
||||
print('ArchiveBox v{}'.format(VERSION))
|
||||
p = platform.uname()
|
||||
print(p.system, platform.platform(), p.machine)
|
||||
print(sys.implementation.name.title(), p.system, platform.platform(), p.machine, '(in Docker)' if IN_DOCKER else '(not in Docker)')
|
||||
print()
|
||||
|
||||
print('{white}[i] Dependency versions:{reset}'.format(**ANSI))
|
||||
|
@ -259,6 +264,7 @@ def run(subcommand: str,
|
|||
@enforce_types
|
||||
def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
|
||||
"""Initialize a new ArchiveBox collection in the current directory"""
|
||||
from core.models import Snapshot
|
||||
Path(out_dir).mkdir(exist_ok=True)
|
||||
is_empty = not len(set(os.listdir(out_dir)) - ALLOWED_IN_OUTPUT_DIR)
|
||||
|
||||
|
@ -312,7 +318,6 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
|
|||
else:
|
||||
print('\n{green}[+] Building main SQL index and running migrations...{reset}'.format(**ANSI))
|
||||
|
||||
setup_django(out_dir, check_db=False)
|
||||
DATABASE_FILE = Path(out_dir) / SQL_INDEX_FILENAME
|
||||
print(f' √ {DATABASE_FILE}')
|
||||
print()
|
||||
|
@ -330,7 +335,7 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
|
|||
print()
|
||||
print('{green}[*] Collecting links from any existing indexes and archive folders...{reset}'.format(**ANSI))
|
||||
|
||||
all_links = get_empty_snapshot_queryset()
|
||||
all_links = Snapshot.objects.none()
|
||||
pending_links: Dict[str, Link] = {}
|
||||
|
||||
if existing_index:
|
||||
|
@ -378,7 +383,7 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
|
|||
print(' archivebox list --status=invalid')
|
||||
|
||||
|
||||
write_main_index(list(pending_links.values()), out_dir=out_dir, finished=True)
|
||||
write_main_index(list(pending_links.values()), out_dir=out_dir)
|
||||
|
||||
print('\n{green}------------------------------------------------------------------{reset}'.format(**ANSI))
|
||||
if existing_index:
|
||||
|
@ -506,7 +511,7 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
|
|||
|
||||
|
||||
@enforce_types
|
||||
def oneshot(url: str, out_dir: Path=OUTPUT_DIR):
|
||||
def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
|
||||
"""
|
||||
Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
|
||||
You can run this to archive single pages without needing to create a whole collection with archivebox init.
|
||||
|
@ -518,8 +523,9 @@ def oneshot(url: str, out_dir: Path=OUTPUT_DIR):
|
|||
color='red'
|
||||
)
|
||||
raise SystemExit(2)
|
||||
methods = ignore_methods(['title'])
|
||||
archive_link(oneshot_link[0], out_dir=out_dir, methods=methods, skip_index=True)
|
||||
|
||||
methods = extractors.split(",") if extractors else ignore_methods(['title'])
|
||||
archive_link(oneshot_link[0], out_dir=out_dir, methods=methods)
|
||||
return oneshot_link
|
||||
|
||||
@enforce_types
|
||||
|
@ -529,8 +535,8 @@ def add(urls: Union[str, List[str]],
|
|||
index_only: bool=False,
|
||||
overwrite: bool=False,
|
||||
init: bool=False,
|
||||
out_dir: Path=OUTPUT_DIR,
|
||||
extractors: str="") -> List[Link]:
|
||||
extractors: str="",
|
||||
out_dir: Path=OUTPUT_DIR) -> List[Link]:
|
||||
"""Add a new URL or list of URLs to your archive"""
|
||||
|
||||
assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
|
||||
|
@ -567,7 +573,7 @@ def add(urls: Union[str, List[str]],
|
|||
imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values())
|
||||
new_links = dedupe_links(all_links, imported_links)
|
||||
|
||||
write_main_index(links=new_links, out_dir=out_dir, finished=not new_links)
|
||||
write_main_index(links=new_links, out_dir=out_dir)
|
||||
all_links = load_main_index(out_dir=out_dir)
|
||||
|
||||
if index_only:
|
||||
|
@ -585,7 +591,7 @@ def add(urls: Union[str, List[str]],
|
|||
archive_links(imported_links, overwrite=True, **archive_kwargs)
|
||||
elif new_links:
|
||||
archive_links(new_links, overwrite=False, **archive_kwargs)
|
||||
|
||||
|
||||
return all_links
|
||||
|
||||
@enforce_types
|
||||
|
@ -660,6 +666,7 @@ def remove(filter_str: Optional[str]=None,
|
|||
|
||||
to_remove = snapshots.count()
|
||||
|
||||
flush_search_index(snapshots=snapshots)
|
||||
remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir)
|
||||
all_snapshots = load_main_index(out_dir=out_dir)
|
||||
log_removal_finished(all_snapshots.count(), to_remove)
|
||||
|
@ -677,6 +684,7 @@ def update(resume: Optional[float]=None,
|
|||
status: Optional[str]=None,
|
||||
after: Optional[str]=None,
|
||||
before: Optional[str]=None,
|
||||
extractors: str="",
|
||||
out_dir: Path=OUTPUT_DIR) -> List[Link]:
|
||||
"""Import any new links from subscriptions and retry any previously failed/skipped links"""
|
||||
|
||||
|
@ -684,6 +692,8 @@ def update(resume: Optional[float]=None,
|
|||
check_dependencies()
|
||||
new_links: List[Link] = [] # TODO: Remove input argument: only_new
|
||||
|
||||
extractors = extractors.split(",") if extractors else []
|
||||
|
||||
# Step 1: Filter for selected_links
|
||||
matching_snapshots = list_links(
|
||||
filter_patterns=filter_patterns,
|
||||
|
@ -700,6 +710,9 @@ def update(resume: Optional[float]=None,
|
|||
all_links = [link for link in matching_folders.values() if link]
|
||||
|
||||
if index_only:
|
||||
for link in all_links:
|
||||
write_link_details(link, out_dir=out_dir, skip_sql_index=True)
|
||||
index_links(all_links, out_dir=out_dir)
|
||||
return all_links
|
||||
|
||||
# Step 2: Run the archive methods for each link
|
||||
|
@ -714,7 +727,13 @@ def update(resume: Optional[float]=None,
|
|||
stderr(f'[√] Nothing found to resume after {resume}', color='green')
|
||||
return all_links
|
||||
|
||||
archive_links(to_archive, overwrite=overwrite, out_dir=out_dir)
|
||||
archive_kwargs = {
|
||||
"out_dir": out_dir,
|
||||
}
|
||||
if extractors:
|
||||
archive_kwargs["methods"] = extractors
|
||||
|
||||
archive_links(to_archive, overwrite=overwrite, **archive_kwargs)
|
||||
|
||||
# Step 4: Re-write links index with updated titles, icons, and resources
|
||||
all_links = load_main_index(out_dir=out_dir)
|
||||
|
@ -747,7 +766,6 @@ def list_all(filter_patterns_str: Optional[str]=None,
|
|||
elif filter_patterns_str:
|
||||
filter_patterns = filter_patterns_str.split('\n')
|
||||
|
||||
|
||||
snapshots = list_links(
|
||||
filter_patterns=filter_patterns,
|
||||
filter_type=filter_type,
|
||||
|
@ -763,8 +781,16 @@ def list_all(filter_patterns_str: Optional[str]=None,
|
|||
status=status,
|
||||
out_dir=out_dir,
|
||||
)
|
||||
|
||||
print(printable_folders(folders, json=json, csv=csv, html=html, with_headers=with_headers))
|
||||
|
||||
if json:
|
||||
output = generate_json_index_from_links(folders.values(), with_headers)
|
||||
elif html:
|
||||
output = generate_index_from_links(folders.values(), with_headers)
|
||||
elif csv:
|
||||
output = links_to_csv(folders.values(), cols=csv.split(','), header=with_headers)
|
||||
else:
|
||||
output = printable_folders(folders, with_headers=with_headers)
|
||||
print(output)
|
||||
return folders
|
||||
|
||||
|
||||
|
@ -1048,7 +1074,6 @@ def server(runserver_args: Optional[List[str]]=None,
|
|||
config.DEBUG = config.DEBUG or debug
|
||||
|
||||
check_data_folder(out_dir=out_dir)
|
||||
setup_django(out_dir)
|
||||
|
||||
from django.core.management import call_command
|
||||
from django.contrib.auth.models import User
|
||||
|
@ -1085,7 +1110,6 @@ def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
|
|||
"""Run an ArchiveBox Django management command"""
|
||||
|
||||
check_data_folder(out_dir=out_dir)
|
||||
setup_django(out_dir)
|
||||
from django.core.management import execute_from_command_line
|
||||
|
||||
if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY):
|
||||
|
@ -1102,7 +1126,6 @@ def shell(out_dir: Path=OUTPUT_DIR) -> None:
|
|||
|
||||
check_data_folder(out_dir=out_dir)
|
||||
|
||||
setup_django(OUTPUT_DIR)
|
||||
from django.core.management import call_command
|
||||
call_command("shell_plus")
|
||||
|
||||
|
|
|
@ -8,10 +8,9 @@ if __name__ == '__main__':
|
|||
# (e.g. makemigrations), you can comment out this check temporarily
|
||||
|
||||
if not ('makemigrations' in sys.argv or 'migrate' in sys.argv):
|
||||
print("[X] Don't run ./manage.py directly, use the archivebox CLI instead e.g.:")
|
||||
print(' archivebox manage createsuperuser')
|
||||
print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):")
|
||||
print()
|
||||
print(' Hint: Use these archivebox commands instead of the ./manage.py equivalents:')
|
||||
print(' Hint: Use these archivebox CLI commands instead of the ./manage.py equivalents:')
|
||||
print(' archivebox init (migrates the databse to latest version)')
|
||||
print(' archivebox server (runs the Django web server)')
|
||||
print(' archivebox shell (opens an iPython Django shell with all models imported)')
|
||||
|
|
|
@ -32,6 +32,7 @@ from ..index.schema import Link
|
|||
from ..logging_util import TimedProgress, log_source_saved
|
||||
|
||||
from .pocket_html import parse_pocket_html_export
|
||||
from .pocket_api import parse_pocket_api_export
|
||||
from .pinboard_rss import parse_pinboard_rss_export
|
||||
from .wallabag_atom import parse_wallabag_atom_export
|
||||
from .shaarli_rss import parse_shaarli_rss_export
|
||||
|
@ -44,6 +45,7 @@ from .generic_txt import parse_generic_txt_export
|
|||
|
||||
PARSERS = (
|
||||
# Specialized parsers
|
||||
('Pocket API', parse_pocket_api_export),
|
||||
('Wallabag ATOM', parse_wallabag_atom_export),
|
||||
('Pocket HTML', parse_pocket_html_export),
|
||||
('Pinboard RSS', parse_pinboard_rss_export),
|
||||
|
|
113
archivebox/parsers/pocket_api.py
Normal file
|
@ -0,0 +1,113 @@
|
|||
__package__ = 'archivebox.parsers'
|
||||
|
||||
|
||||
import re
|
||||
|
||||
from typing import IO, Iterable, Optional
|
||||
from configparser import ConfigParser
|
||||
|
||||
from pathlib import Path
|
||||
from ..vendor.pocket import Pocket
|
||||
|
||||
from ..index.schema import Link
|
||||
from ..util import enforce_types
|
||||
from ..system import atomic_write
|
||||
from ..config import (
|
||||
SOURCES_DIR,
|
||||
POCKET_CONSUMER_KEY,
|
||||
POCKET_ACCESS_TOKENS,
|
||||
)
|
||||
|
||||
|
||||
COUNT_PER_PAGE = 500
|
||||
API_DB_PATH = Path(SOURCES_DIR) / 'pocket_api.db'
|
||||
|
||||
# search for broken protocols that sometimes come from the Pocket API
|
||||
_BROKEN_PROTOCOL_RE = re.compile('^(http[s]?)(:/(?!/))')
|
||||
|
||||
|
||||
def get_pocket_articles(api: Pocket, since=None, page=0):
|
||||
body, headers = api.get(
|
||||
state='archive',
|
||||
sort='oldest',
|
||||
since=since,
|
||||
count=COUNT_PER_PAGE,
|
||||
offset=page * COUNT_PER_PAGE,
|
||||
)
|
||||
|
||||
articles = body['list'].values() if isinstance(body['list'], dict) else body['list']
|
||||
returned_count = len(articles)
|
||||
|
||||
yield from articles
|
||||
|
||||
if returned_count == COUNT_PER_PAGE:
|
||||
yield from get_pocket_articles(api, since=since, page=page + 1)
|
||||
else:
|
||||
api.last_since = body['since']
|
||||
|
||||
|
||||
def link_from_article(article: dict, sources: list):
|
||||
url: str = article['resolved_url'] or article['given_url']
|
||||
broken_protocol = _BROKEN_PROTOCOL_RE.match(url)
|
||||
if broken_protocol:
|
||||
url = url.replace(f'{broken_protocol.group(1)}:/', f'{broken_protocol.group(1)}://')
|
||||
title = article['resolved_title'] or article['given_title'] or url
|
||||
|
||||
return Link(
|
||||
url=url,
|
||||
timestamp=article['time_read'],
|
||||
title=title,
|
||||
tags=article.get('tags'),
|
||||
sources=sources
|
||||
)
|
||||
|
||||
|
||||
def write_since(username: str, since: str):
|
||||
if not API_DB_PATH.exists():
|
||||
atomic_write(API_DB_PATH, '')
|
||||
|
||||
since_file = ConfigParser()
|
||||
since_file.optionxform = str
|
||||
since_file.read(API_DB_PATH)
|
||||
|
||||
since_file[username] = {
|
||||
'since': since
|
||||
}
|
||||
|
||||
with open(API_DB_PATH, 'w+') as new:
|
||||
since_file.write(new)
|
||||
|
||||
|
||||
def read_since(username: str) -> Optional[str]:
|
||||
if not API_DB_PATH.exists():
|
||||
atomic_write(API_DB_PATH, '')
|
||||
|
||||
config_file = ConfigParser()
|
||||
config_file.optionxform = str
|
||||
config_file.read(API_DB_PATH)
|
||||
|
||||
return config_file.get(username, 'since', fallback=None)
|
||||
|
||||
|
||||
@enforce_types
|
||||
def should_parse_as_pocket_api(text: str) -> bool:
|
||||
return text.startswith('pocket://')
|
||||
|
||||
|
||||
@enforce_types
|
||||
def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
|
||||
"""Parse bookmarks from the Pocket API"""
|
||||
|
||||
input_buffer.seek(0)
|
||||
pattern = re.compile(r"^pocket:\/\/(\w+)")
|
||||
for line in input_buffer:
|
||||
if should_parse_as_pocket_api(line):
|
||||
|
||||
username = pattern.search(line).group(1)
|
||||
api = Pocket(POCKET_CONSUMER_KEY, POCKET_ACCESS_TOKENS[username])
|
||||
api.last_since = None
|
||||
|
||||
for article in get_pocket_articles(api, since=read_since(username)):
|
||||
yield link_from_article(article, sources=[line])
|
||||
|
||||
write_since(username, api.last_since)
|
108
archivebox/search/__init__.py
Normal file
|
@ -0,0 +1,108 @@
|
|||
from typing import List, Union
|
||||
from pathlib import Path
|
||||
from importlib import import_module
|
||||
|
||||
from django.db.models import QuerySet
|
||||
|
||||
from archivebox.index.schema import Link
|
||||
from archivebox.util import enforce_types
|
||||
from archivebox.config import stderr, OUTPUT_DIR, USE_INDEXING_BACKEND, USE_SEARCHING_BACKEND, SEARCH_BACKEND_ENGINE
|
||||
|
||||
from .utils import get_indexable_content, log_index_started
|
||||
|
||||
def indexing_enabled():
|
||||
return USE_INDEXING_BACKEND
|
||||
|
||||
def search_backend_enabled():
|
||||
return USE_SEARCHING_BACKEND
|
||||
|
||||
def get_backend():
|
||||
return f'search.backends.{SEARCH_BACKEND_ENGINE}'
|
||||
|
||||
def import_backend():
|
||||
backend_string = get_backend()
|
||||
try:
|
||||
backend = import_module(backend_string)
|
||||
except Exception as err:
|
||||
raise Exception("Could not load '%s' as a backend: %s" % (backend_string, err))
|
||||
return backend
|
||||
|
||||
@enforce_types
|
||||
def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir: Path=OUTPUT_DIR, skip_text_index: bool=False) -> None:
|
||||
if not indexing_enabled():
|
||||
return
|
||||
|
||||
if not skip_text_index and texts:
|
||||
from core.models import Snapshot
|
||||
|
||||
snap = Snapshot.objects.filter(url=link.url).first()
|
||||
backend = import_backend()
|
||||
if snap:
|
||||
try:
|
||||
backend.index(snapshot_id=str(snap.id), texts=texts)
|
||||
except Exception as err:
|
||||
stderr()
|
||||
stderr(
|
||||
f'[X] The search backend threw an exception={err}:',
|
||||
color='red',
|
||||
)
|
||||
|
||||
@enforce_types
|
||||
def query_search_index(query: str, out_dir: Path=OUTPUT_DIR) -> QuerySet:
|
||||
from core.models import Snapshot
|
||||
|
||||
if search_backend_enabled():
|
||||
backend = import_backend()
|
||||
try:
|
||||
snapshot_ids = backend.search(query)
|
||||
except Exception as err:
|
||||
stderr()
|
||||
stderr(
|
||||
f'[X] The search backend threw an exception={err}:',
|
||||
color='red',
|
||||
)
|
||||
raise
|
||||
else:
|
||||
# TODO preserve ordering from backend
|
||||
qsearch = Snapshot.objects.filter(pk__in=snapshot_ids)
|
||||
return qsearch
|
||||
|
||||
return Snapshot.objects.none()
|
||||
|
||||
@enforce_types
|
||||
def flush_search_index(snapshots: QuerySet):
|
||||
if not indexing_enabled() or not snapshots:
|
||||
return
|
||||
backend = import_backend()
|
||||
snapshot_ids=(str(pk) for pk in snapshots.values_list('pk',flat=True))
|
||||
try:
|
||||
backend.flush(snapshot_ids)
|
||||
except Exception as err:
|
||||
stderr()
|
||||
stderr(
|
||||
f'[X] The search backend threw an exception={err}:',
|
||||
color='red',
|
||||
)
|
||||
|
||||
@enforce_types
|
||||
def index_links(links: Union[List[Link],None], out_dir: Path=OUTPUT_DIR):
|
||||
if not links:
|
||||
return
|
||||
|
||||
from core.models import Snapshot, ArchiveResult
|
||||
|
||||
for link in links:
|
||||
snap = Snapshot.objects.filter(url=link.url).first()
|
||||
if snap:
|
||||
results = ArchiveResult.objects.indexable().filter(snapshot=snap)
|
||||
log_index_started(link.url)
|
||||
try:
|
||||
texts = get_indexable_content(results)
|
||||
except Exception as err:
|
||||
stderr()
|
||||
stderr(
|
||||
f'[X] An Exception ocurred reading the indexable content={err}:',
|
||||
color='red',
|
||||
)
|
||||
else:
|
||||
write_search_index(link, texts, out_dir=out_dir)
|
0
archivebox/search/backends/__init__.py
Normal file
45
archivebox/search/backends/ripgrep.py
Normal file
|
@ -0,0 +1,45 @@
|
|||
import re
|
||||
from subprocess import run, PIPE
|
||||
from typing import List, Generator
|
||||
|
||||
from archivebox.config import ARCHIVE_DIR, RIPGREP_VERSION
|
||||
from archivebox.util import enforce_types
|
||||
|
||||
RG_IGNORE_EXTENSIONS = ('css','js','orig','svg')
|
||||
|
||||
RG_ADD_TYPE = '--type-add'
|
||||
RG_IGNORE_ARGUMENTS = f"ignore:*.{{{','.join(RG_IGNORE_EXTENSIONS)}}}"
|
||||
RG_DEFAULT_ARGUMENTS = "-ilTignore" # Case insensitive(i), matching files results(l)
|
||||
RG_REGEX_ARGUMENT = '-e'
|
||||
|
||||
TIMESTAMP_REGEX = r'\/([\d]+\.[\d]+)\/'
|
||||
|
||||
ts_regex = re.compile(TIMESTAMP_REGEX)
|
||||
|
||||
@enforce_types
|
||||
def index(snapshot_id: str, texts: List[str]):
|
||||
return
|
||||
|
||||
@enforce_types
|
||||
def flush(snapshot_ids: Generator[str, None, None]):
|
||||
return
|
||||
|
||||
@enforce_types
|
||||
def search(text: str) -> List[str]:
|
||||
if not RIPGREP_VERSION:
|
||||
raise Exception("ripgrep binary not found, install ripgrep to use this search backend")
|
||||
|
||||
from core.models import Snapshot
|
||||
|
||||
rg_cmd = ['rg', RG_ADD_TYPE, RG_IGNORE_ARGUMENTS, RG_DEFAULT_ARGUMENTS, RG_REGEX_ARGUMENT, text, str(ARCHIVE_DIR)]
|
||||
rg = run(rg_cmd, stdout=PIPE, stderr=PIPE, timeout=60)
|
||||
file_paths = [p.decode() for p in rg.stdout.splitlines()]
|
||||
timestamps = set()
|
||||
for path in file_paths:
|
||||
ts = ts_regex.findall(path)
|
||||
if ts:
|
||||
timestamps.add(ts[0])
|
||||
|
||||
snap_ids = [str(id) for id in Snapshot.objects.filter(timestamp__in=timestamps).values_list('pk', flat=True)]
|
||||
|
||||
return snap_ids
|
28
archivebox/search/backends/sonic.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
from typing import List, Generator
|
||||
|
||||
from sonic import IngestClient, SearchClient
|
||||
|
||||
from archivebox.util import enforce_types
|
||||
from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD, SONIC_BUCKET, SONIC_COLLECTION
|
||||
|
||||
MAX_SONIC_TEXT_LENGTH = 20000
|
||||
|
||||
@enforce_types
|
||||
def index(snapshot_id: str, texts: List[str]):
|
||||
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
|
||||
for text in texts:
|
||||
chunks = [text[i:i+MAX_SONIC_TEXT_LENGTH] for i in range(0, len(text), MAX_SONIC_TEXT_LENGTH)]
|
||||
for chunk in chunks:
|
||||
ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(chunk))
|
||||
|
||||
@enforce_types
|
||||
def search(text: str) -> List[str]:
|
||||
with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl:
|
||||
snap_ids = querycl.query(SONIC_COLLECTION, SONIC_BUCKET, text)
|
||||
return snap_ids
|
||||
|
||||
@enforce_types
|
||||
def flush(snapshot_ids: Generator[str, None, None]):
|
||||
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
|
||||
for id in snapshot_ids:
|
||||
ingestcl.flush_object(SONIC_COLLECTION, SONIC_BUCKET, str(id))
|
44
archivebox/search/utils.py
Normal file
|
@ -0,0 +1,44 @@
|
|||
from django.db.models import QuerySet
|
||||
|
||||
from archivebox.util import enforce_types
|
||||
from archivebox.config import ANSI
|
||||
|
||||
def log_index_started(url):
|
||||
print('{green}[*] Indexing url: {} in the search index {reset}'.format(url, **ANSI))
|
||||
print( )
|
||||
|
||||
def get_file_result_content(res, extra_path, use_pwd=False):
|
||||
if use_pwd:
|
||||
fpath = f'{res.pwd}/{res.output}'
|
||||
else:
|
||||
fpath = f'{res.output}'
|
||||
|
||||
if extra_path:
|
||||
fpath = f'{fpath}/{extra_path}'
|
||||
|
||||
with open(fpath, 'r') as file:
|
||||
data = file.read()
|
||||
if data:
|
||||
return [data]
|
||||
return []
|
||||
|
||||
|
||||
# This should be abstracted by a plugin interface for extractors
|
||||
@enforce_types
|
||||
def get_indexable_content(results: QuerySet):
|
||||
if not results:
|
||||
return []
|
||||
# Only use the first method available
|
||||
res, method = results.first(), results.first().extractor
|
||||
if method not in ('readability', 'singlefile', 'dom', 'wget'):
|
||||
return []
|
||||
# This should come from a plugin interface
|
||||
|
||||
if method == 'readability':
|
||||
return get_file_result_content(res, 'content.txt')
|
||||
elif method == 'singlefile':
|
||||
return get_file_result_content(res, '')
|
||||
elif method == 'dom':
|
||||
return get_file_result_content(res,'',use_pwd=True)
|
||||
elif method == 'wget':
|
||||
return get_file_result_content(res,'',use_pwd=True)
|
|
@ -107,6 +107,9 @@
|
|||
<a href="{% url 'admin:password_change' %}">{% trans 'Change password' %}</a> /
|
||||
{% endif %}
|
||||
<a href="{% url 'admin:logout' %}">{% trans 'Log out' %}</a>
|
||||
|
|
||||
<a> <span id="snapshotListView" style="cursor: pointer">☰</span> </a>
|
||||
<a> <span id="snapshotGridView"style="letter-spacing: -.4em; cursor: pointer;">⣿⣿</span></a>
|
||||
{% endblock %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
@ -179,8 +182,63 @@
|
|||
});
|
||||
}
|
||||
};
|
||||
|
||||
function redirectWithQuery(uri){
|
||||
uri_query = uri + document.location.search;
|
||||
window.location = uri_query;
|
||||
|
||||
};
|
||||
|
||||
function selectSnapshotListView(){
|
||||
localStorage.setItem('currentSnapshotView', 'List');
|
||||
redirectWithQuery("{% url 'admin:core_snapshot_changelist' %}");
|
||||
};
|
||||
|
||||
function selectSnapshotGridView(){
|
||||
localStorage.setItem('currentSnapshotView', 'Grid');
|
||||
redirectWithQuery("{% url 'admin:grid' %}");
|
||||
};
|
||||
|
||||
function setPreferredSnapshotView(view){
|
||||
urlPath = window.location.pathname;
|
||||
|
||||
if((view==="Grid") && urlPath == "{% url 'admin:core_snapshot_changelist' %}"){
|
||||
selectSnapshotGridView();
|
||||
}
|
||||
|
||||
{% comment %}
|
||||
else if((view==="List") && urlPath == "{% url 'admin:grid' %}"){
|
||||
selectSnapshotListView();
|
||||
|
||||
}
|
||||
{% endcomment %}
|
||||
};
|
||||
|
||||
function setupSnapshotViews() {
|
||||
const preferredSnapshotView = localStorage.getItem('currentSnapshotView');
|
||||
setPreferredSnapshotView(preferredSnapshotView);
|
||||
|
||||
$( document ).ready(function() {
|
||||
|
||||
$("#snapshotListView").click(function() {
|
||||
selectSnapshotListView();
|
||||
});
|
||||
$("#snapshotGridView").click(function() {
|
||||
selectSnapshotGridView();
|
||||
});
|
||||
|
||||
$('input:checkbox').change(function(){
|
||||
if($(this).is(':checked'))
|
||||
$(this).parent().parent().parent().parent().addClass('selected-card');
|
||||
else
|
||||
$(this).parent().parent().parent().parent().removeClass('selected-card')
|
||||
});
|
||||
|
||||
});
|
||||
};
|
||||
$(function () {
|
||||
fix_actions();
|
||||
setupSnapshotViews();
|
||||
});
|
||||
})(django.jQuery);
|
||||
</script>
|
||||
|
|
91
archivebox/themes/admin/grid_change_list.html
Normal file
|
@ -0,0 +1,91 @@
|
|||
{% extends "admin/base_site.html" %}
|
||||
{% load i18n admin_urls static admin_list %}
|
||||
{% load core_tags %}
|
||||
|
||||
{% block extrastyle %}
|
||||
{{ block.super }}
|
||||
<link rel="stylesheet" type="text/css" href="{% static "admin/css/changelists.css" %}">
|
||||
{% if cl.formset %}
|
||||
<link rel="stylesheet" type="text/css" href="{% static "admin/css/forms.css" %}">
|
||||
{% endif %}
|
||||
{% if cl.formset or action_form %}
|
||||
<script src="{% url 'admin:jsi18n' %}"></script>
|
||||
{% endif %}
|
||||
{{ media.css }}
|
||||
{% if not actions_on_top and not actions_on_bottom %}
|
||||
<style>
|
||||
#changelist table thead th:first-child {width: inherit}
|
||||
</style>
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
|
||||
{% block extrahead %}
|
||||
{{ block.super }}
|
||||
{{ media.js }}
|
||||
{% endblock %}
|
||||
|
||||
{% block bodyclass %}{{ block.super }} app-{{ opts.app_label }} model-{{ opts.model_name }} change-list{% endblock %}
|
||||
|
||||
{% if not is_popup %}
|
||||
{% block breadcrumbs %}
|
||||
<div class="breadcrumbs">
|
||||
<a href="{% url 'admin:index' %}">{% translate 'Home' %}</a>
|
||||
› <a href="{% url 'admin:app_list' app_label=cl.opts.app_label %}">{{ cl.opts.app_config.verbose_name }}</a>
|
||||
› {{ cl.opts.verbose_name_plural|capfirst }}
|
||||
</div>
|
||||
{% endblock %}
|
||||
{% endif %}
|
||||
|
||||
{% block coltype %}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div id="content-main">
|
||||
{% block object-tools %}
|
||||
<ul class="object-tools">
|
||||
{% block object-tools-items %}
|
||||
{% change_list_object_tools %}
|
||||
{% endblock %}
|
||||
</ul>
|
||||
{% endblock %}
|
||||
{% if cl.formset and cl.formset.errors %}
|
||||
<p class="errornote">
|
||||
{% if cl.formset.total_error_count == 1 %}{% translate "Please correct the error below." %}{% else %}{% translate "Please correct the errors below." %}{% endif %}
|
||||
</p>
|
||||
{{ cl.formset.non_form_errors }}
|
||||
{% endif %}
|
||||
<div class="module{% if cl.has_filters %} filtered{% endif %}" id="changelist">
|
||||
<div class="changelist-form-container">
|
||||
{% block search %}{% search_form cl %}{% endblock %}
|
||||
{% block date_hierarchy %}{% if cl.date_hierarchy %}{% date_hierarchy cl %}{% endif %}{% endblock %}
|
||||
|
||||
<form id="changelist-form" method="post"{% if cl.formset and cl.formset.is_multipart %} enctype="multipart/form-data"{% endif %} novalidate>{% csrf_token %}
|
||||
{% if cl.formset %}
|
||||
<div>{{ cl.formset.management_form }}</div>
|
||||
{% endif %}
|
||||
|
||||
{% block result_list %}
|
||||
{% if action_form and actions_on_top and cl.show_admin_actions %}{% admin_actions %}{% endif %}
|
||||
{% comment %}
|
||||
Table grid
|
||||
{% result_list cl %}
|
||||
{% endcomment %}
|
||||
{% snapshots_grid cl %}
|
||||
{% if action_form and actions_on_bottom and cl.show_admin_actions %}{% admin_actions %}{% endif %}
|
||||
{% endblock %}
|
||||
{% block pagination %}{% pagination cl %}{% endblock %}
|
||||
</form>
|
||||
</div>
|
||||
{% block filters %}
|
||||
{% if cl.has_filters %}
|
||||
<div id="changelist-filter">
|
||||
<h2>{% translate 'Filter' %}</h2>
|
||||
{% if cl.has_active_filters %}<h3 id="changelist-filter-clear">
|
||||
<a href="{{ cl.clear_all_filters_qs }}">✖ {% translate "Clear all filters" %}</a>
|
||||
</h3>{% endif %}
|
||||
{% for spec in cl.filter_specs %}{% admin_list_filter cl spec %}{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
162
archivebox/themes/admin/snapshots_grid.html
Normal file
|
@ -0,0 +1,162 @@
|
|||
{% load i18n admin_urls static admin_list %}
|
||||
{% load core_tags %}
|
||||
|
||||
{% block extrastyle %}
|
||||
<style>
|
||||
* {
|
||||
-webkit-box-sizing: border-box;
|
||||
-moz-box-sizing: border-box;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
a {
|
||||
text-decoration: none;
|
||||
color: orange;
|
||||
}
|
||||
h2 {
|
||||
color: #000;
|
||||
margin: 2rem 0 .5rem;
|
||||
font-size: 1.25rem;
|
||||
font-weight: 400;
|
||||
{% comment %} text-transform: uppercase; {% endcomment %}
|
||||
}
|
||||
|
||||
card.img {
|
||||
display: block;
|
||||
border: 0;
|
||||
width: 100%;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
/*************************** Cards *******************************/
|
||||
|
||||
.cards {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); /* see notes below */
|
||||
grid-auto-rows: minmax(200px, auto);
|
||||
grid-gap: 1rem;
|
||||
}
|
||||
|
||||
.card {
|
||||
/*height: 200px;*/
|
||||
/*background: red;*/
|
||||
border: 2px solid #e7e7e7;
|
||||
border-radius: 4px;
|
||||
-webkit-box-shadow: 0 2px 2px rgba(0, 0, 0, 0.15);
|
||||
box-shadow: 0 2px 2px rgba(0, 0, 0, 0.15);
|
||||
display: flex;
|
||||
/* -webkit-box-orient: vertical; */
|
||||
/* -webkit-box-direction: normal; */
|
||||
-ms-flex-direction: column;
|
||||
flex-direction: column;
|
||||
position: relative;
|
||||
color: #5d5e5e;
|
||||
} /* li item */
|
||||
|
||||
.thumbnail img {
|
||||
height: 100%;
|
||||
box-sizing: border-box;
|
||||
max-width: 100%;
|
||||
max-height: 100%;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.card-content {
|
||||
font-size: .75rem;
|
||||
padding: .5rem;
|
||||
display: flex;
|
||||
-webkit-box-orient: vertical;
|
||||
-webkit-box-direction: normal;
|
||||
-ms-flex-direction: column;
|
||||
flex-direction: column;
|
||||
-webkit-box-flex: 1;
|
||||
-ms-flex: 1;
|
||||
flex: 1;
|
||||
|
||||
}
|
||||
|
||||
.card-content h4{
|
||||
vertical-align:bottom;
|
||||
margin: 1.2em 0 0em 0;
|
||||
}
|
||||
|
||||
.category {
|
||||
font-size: .75rem;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
.category {
|
||||
position: absolute;
|
||||
top: 5%;
|
||||
right: 0;
|
||||
color: #fff;
|
||||
background: #e74c3c;
|
||||
padding: 10px 15px;
|
||||
font-size: 10px;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
|
||||
.category__01 {
|
||||
background-color: #50c6db;
|
||||
|
||||
}
|
||||
|
||||
.tags{
|
||||
opacity: 0.8;
|
||||
}
|
||||
|
||||
footer {
|
||||
border-top: 2px solid #e7e7e7;
|
||||
{% comment %} margin: .5rem 0 0; {% endcomment %}
|
||||
{% comment %} min-height: 30px; {% endcomment %}
|
||||
font-size: .5rem;
|
||||
}
|
||||
.post-meta {
|
||||
padding: .3rem;
|
||||
}
|
||||
|
||||
.comments {
|
||||
margin-left: .5rem;
|
||||
}
|
||||
|
||||
.selected-card{
|
||||
border: 5px solid #ffaa31;
|
||||
}
|
||||
|
||||
</style>
|
||||
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<section class="cards">
|
||||
{% for obj in results %}
|
||||
<article class="card">
|
||||
<picture class="thumbnail">
|
||||
<a href="/{{obj.archive_path}}/index.html">
|
||||
<img class="category__01" src="{% snapshot_image obj%}" alt="" />
|
||||
</a>
|
||||
</picture>
|
||||
<div class="card-content">
|
||||
{% if obj.tags_str %}
|
||||
<p class="category category__01 tags">{{obj.tags_str}}</p>
|
||||
{% endif %}
|
||||
{% if obj.title %}
|
||||
<a href="{% url 'admin:core_snapshot_change' obj.id %}">
|
||||
<h4>{{obj.title|truncatechars:55 }}</h4>
|
||||
</a>
|
||||
{% endif %}
|
||||
{% comment %} <p> TEXT If needed.</p> {% endcomment %}
|
||||
</div><!-- .card-content -->
|
||||
<footer>
|
||||
<div class="post-meta">
|
||||
<span style="float:right;"><input type="checkbox" name="_selected_action" value="{{obj.pk}}" class="action-select"></span>
|
||||
<span class="timestamp">🕑 {{obj.added}}</span>
|
||||
<span class="comments">📖{{obj.num_outputs}}</span>
|
||||
<span>🗄️{{ obj.archive_size | file_size }}</span>
|
||||
</div>
|
||||
</footer>
|
||||
</article>
|
||||
{% endfor %}
|
||||
</section>
|
||||
|
||||
{% endblock %}
|
|
@ -187,13 +187,6 @@
|
|||
display: none;
|
||||
}
|
||||
|
||||
body[data-status~=finished] .files-spinner {
|
||||
display: none;
|
||||
}
|
||||
|
||||
/*body[data-status~=running] .in-progress {
|
||||
display: inline-block;
|
||||
}*/
|
||||
tr td a.favicon img {
|
||||
padding-left: 6px;
|
||||
padding-right: 12px;
|
||||
|
@ -224,12 +217,10 @@
|
|||
color: black;
|
||||
}
|
||||
|
||||
tr td a.exists-True {
|
||||
opacity: 1;
|
||||
}
|
||||
tr td a.exists-False {
|
||||
opacity: 0.1;
|
||||
filter: grayscale(100%);
|
||||
.exists-False {
|
||||
opacity: 0.1;
|
||||
filter: grayscale(100%);
|
||||
pointer-events: none;
|
||||
}
|
||||
</style>
|
||||
<link rel="stylesheet" href="{% static 'bootstrap.min.css' %}">
|
||||
|
@ -280,10 +271,9 @@
|
|||
<br />
|
||||
<center>
|
||||
<small>
|
||||
Archive created using <a href="https://github.com/ArchiveBox/ArchiveBox" title="Github">ArchiveBox</a> |
|
||||
|
||||
Download index as <a href="index.json" title="JSON summary of archived links.">JSON</a>
|
||||
<br /><br />
|
||||
Archive created using <a href="https://github.com/ArchiveBox/ArchiveBox" title="Github">ArchiveBox</a> version
|
||||
<a href="https://github.com/ArchiveBox/ArchiveBox/releases" title="Releases">v{{VERSION}}</a>.
|
||||
<br/><br/>
|
||||
{{FOOTER_INFO}}
|
||||
</small>
|
||||
</center>
|
||||
|
|
|
@ -2,44 +2,25 @@
|
|||
{% load static %}
|
||||
|
||||
{% block body %}
|
||||
<br>
|
||||
<form action="{% url 'public-index' %}" method="get">
|
||||
<input name="q" type="text" placeholder="Search...">
|
||||
<button type="submit">Search</button>
|
||||
<button onclick="location.href='{% url 'public-index' %}'" type="button">
|
||||
Reload Index</button>
|
||||
</form>
|
||||
<table id="table-bookmarks">
|
||||
<thead>
|
||||
<tr>
|
||||
<th style="width: 100px;">Bookmarked</th>
|
||||
<th style="width: 26vw;">Saved Link ({{num_links}})</th>
|
||||
<th style="width: 140px">Files</th>
|
||||
<th style="width: 16vw;whitespace:nowrap;overflow-x:hidden;">Original URL</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<br>
|
||||
<form action="{% url 'public-index' %}" method="get">
|
||||
<input name="q" type="text" placeholder="Search...">
|
||||
<button type="submit">Search</button>
|
||||
<button onclick="location.href='{% url 'public-index' %}'" type="button">
|
||||
Reload Index</button>
|
||||
</form>
|
||||
<table id="table-bookmarks">
|
||||
<thead>
|
||||
<tr>
|
||||
<th style="width: 100px;">Bookmarked</th>
|
||||
<th style="width: 26vw;">Snapshot ({{object_list|length}})</th>
|
||||
<th style="width: 140px">Files</th>
|
||||
<th style="width: 16vw;whitespace:nowrap;overflow-x:hidden;">Original URL</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for link in object_list %}
|
||||
<tr>
|
||||
<td title="{{link.timestamp}}">{{link.added}}</td>
|
||||
<td class="title-col">
|
||||
{% if link.is_archived %}
|
||||
<a href="archive/{{link.timestamp}}/index.html"><img src="archive/{{link.timestamp}}/favicon.ico" class="link-favicon" decoding="async"></a>
|
||||
{% else %}
|
||||
<a href="archive/{{link.timestamp}}/index.html"><img src="{% static 'spinner.gif' %}" class="link-favicon" decoding="async"></a>
|
||||
{% endif %}
|
||||
<a href="archive/{{link.timestamp}}/index.html" title="{{link.title}}">
|
||||
<span data-title-for="{{link.url}}" data-archived="{{link.is_archived}}">{{link.title|default:'Loading...'}}</span>
|
||||
<small style="float:right">{{link.tags_str}}</small>
|
||||
</a>
|
||||
</td>
|
||||
<td>
|
||||
<a href="archive/{{link.timestamp}}/index.html">📄
|
||||
<span data-number-for="{{link.url}}" title="Fetching any missing files...">{{link.icons}} <img src="{% static 'spinner.gif' %}" class="files-spinner" decoding="async"/></span>
|
||||
</a>
|
||||
</td>
|
||||
<td style="text-align:left"><a href="{{link.url}}">{{link.url}}</a></td>
|
||||
</tr>
|
||||
{% include 'main_index_row.html' with link=link %}
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
@ -59,6 +40,12 @@
|
|||
<a href="{% url 'public-index' %}?page={{ page_obj.paginator.num_pages }}">last »</a>
|
||||
{% endif %}
|
||||
</span>
|
||||
<br>
|
||||
|
||||
{% if page_obj.has_next %}
|
||||
<a href="{% url 'public-index' %}?page={{ page_obj.next_page_number }}">next </a>
|
||||
<a href="{% url 'public-index' %}?page={{ page_obj.paginator.num_pages }}">last »</a>
|
||||
{% endif %}
|
||||
</span>
|
||||
<br>
|
||||
</center>
|
||||
{% endblock %}
|
||||
{% endblock %}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>$title</title>
|
||||
<title>{{title}}</title>
|
||||
<meta charset="utf-8" name="viewport" content="width=device-width, initial-scale=1">
|
||||
<style>
|
||||
html, body {
|
||||
|
@ -249,13 +249,13 @@
|
|||
<div class="col-lg-8">
|
||||
<img src="favicon.ico" alt="Favicon">
|
||||
|
||||
$title
|
||||
{{title}}
|
||||
|
||||
<a href="#" class="header-toggle">▾</a>
|
||||
<br/>
|
||||
<small>
|
||||
<a href="$url" class="header-url" title="$url">
|
||||
$url_str
|
||||
<a href="{{url}}" class="header-url" title="{{url}}">
|
||||
{{url_str}}
|
||||
</a>
|
||||
</small>
|
||||
</div>
|
||||
|
@ -266,41 +266,41 @@
|
|||
<div class="col-lg-4">
|
||||
<div title="Date bookmarked or imported" class="info-chunk">
|
||||
<h5>Added</h5>
|
||||
$bookmarked_date
|
||||
{{bookmarked_date}}
|
||||
</div>
|
||||
<div title="Date first archived" class="info-chunk">
|
||||
<h5>First Archived</h5>
|
||||
$oldest_archive_date
|
||||
{{oldest_archive_date}}
|
||||
</div>
|
||||
<div title="Date last checked" class="info-chunk">
|
||||
<h5>Last Checked</h5>
|
||||
$updated_date
|
||||
{{updated_date}}
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-lg-4">
|
||||
<div class="info-chunk">
|
||||
<h5>Type</h5>
|
||||
<div class="badge badge-default">$extension</div>
|
||||
<div class="badge badge-default">{{extension}}</div>
|
||||
</div>
|
||||
<div class="info-chunk">
|
||||
<h5>Tags</h5>
|
||||
<div class="badge badge-warning">$tags</div>
|
||||
<div class="badge badge-warning">{{tags}}</div>
|
||||
</div>
|
||||
<div class="info-chunk">
|
||||
<h5>Status</h5>
|
||||
<div class="badge badge-$status_color">$status</div>
|
||||
<div class="badge badge-{{status_color}}">{{status}}</div>
|
||||
</div>
|
||||
<div class="info-chunk">
|
||||
<h5>Saved</h5>
|
||||
✅ $num_outputs
|
||||
✅ {{num_outputs}}
|
||||
</div>
|
||||
<div class="info-chunk">
|
||||
<h5>Errors</h5>
|
||||
❌ $num_failures
|
||||
❌ {{num_failures}}
|
||||
</div>
|
||||
<div class="info-chunk">
|
||||
<h5>Size</h5>
|
||||
$size
|
||||
{{size}}
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-lg-4">
|
||||
|
@ -318,97 +318,97 @@
|
|||
<div class="row header-bottom-frames">
|
||||
<div class="col-lg-3">
|
||||
<div class="card selected-card">
|
||||
<iframe class="card-img-top" src="$archive_url" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
|
||||
<iframe class="card-img-top" src="{{archive_url}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
|
||||
<div class="card-body">
|
||||
<a href="$archive_url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<a href="{{archive_url}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<img src="../../static/external.png" class="external"/>
|
||||
</a>
|
||||
<a href="$archive_url" target="preview"><h4 class="card-title">Wget > WARC</h4></a>
|
||||
<p class="card-text">archive/$domain</p>
|
||||
<a href="{{archive_url}}" target="preview"><h4 class="card-title">Wget > WARC</h4></a>
|
||||
<p class="card-text">archive/{{domain}}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-lg-3">
|
||||
<div class="card">
|
||||
<iframe class="card-img-top" src="$singlefile_path" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
|
||||
<iframe class="card-img-top" src="{{singlefile_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
|
||||
<div class="card-body">
|
||||
<a href="$singlefile_path" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<a href="{{singlefile_path}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<img src="../../static/external.png" class="external"/>
|
||||
</a>
|
||||
<a href="$singlefile_path" target="preview"><h4 class="card-title">Chrome > SingleFile</h4></a>
|
||||
<a href="{{singlefile_path}}" target="preview"><h4 class="card-title">Chrome > SingleFile</h4></a>
|
||||
<p class="card-text">archive/singlefile.html</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-lg-3">
|
||||
<div class="card">
|
||||
<iframe class="card-img-top" src="$archive_org_path" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
|
||||
<iframe class="card-img-top" src="{{archive_org_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
|
||||
<div class="card-body">
|
||||
<a href="$archive_org_path" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<a href="{{archive_org_path}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<img src="../../static/external.png" class="external"/>
|
||||
</a>
|
||||
<a href="$archive_org_path" target="preview"><h4 class="card-title">Archive.Org</h4></a>
|
||||
<a href="{{archive_org_path}}" target="preview"><h4 class="card-title">Archive.Org</h4></a>
|
||||
<p class="card-text">web.archive.org/web/...</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-lg-3">
|
||||
<div class="card">
|
||||
<iframe class="card-img-top" src="$url" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
|
||||
<iframe class="card-img-top" src="{{url}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
|
||||
<div class="card-body">
|
||||
<a href="$url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<a href="{{url}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<img src="../../static/external.png" class="external"/>
|
||||
</a>
|
||||
<a href="$url" target="preview"><h4 class="card-title">Original</h4></a>
|
||||
<p class="card-text">$domain</p>
|
||||
<a href="{{url}}" target="preview"><h4 class="card-title">Original</h4></a>
|
||||
<p class="card-text">{{domain}}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<br/>
|
||||
<div class="col-lg-3">
|
||||
<div class="card">
|
||||
<iframe class="card-img-top pdf-frame" src="$pdf_path" scrolling="no"></iframe>
|
||||
<iframe class="card-img-top pdf-frame" src="{{pdf_path}}" scrolling="no"></iframe>
|
||||
<div class="card-body">
|
||||
<a href="$pdf_path" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<a href="{{pdf_path}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<img src="../../static/external.png" class="external"/>
|
||||
</a>
|
||||
<a href="$pdf_path" target="preview" id="pdf-btn"><h4 class="card-title">Chrome > PDF</h4></a>
|
||||
<a href="{{pdf_path}}" target="preview" id="pdf-btn"><h4 class="card-title">Chrome > PDF</h4></a>
|
||||
<p class="card-text">archive/output.pdf</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-lg-3">
|
||||
<div class="card">
|
||||
<img class="card-img-top screenshot" src="$screenshot_path"></iframe>
|
||||
<img class="card-img-top screenshot" src="{{screenshot_path}}"></iframe>
|
||||
<div class="card-body">
|
||||
<a href="$screenshot_path" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<a href="{{screenshot_path}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<img src="../../static/external.png" class="external"/>
|
||||
</a>
|
||||
<a href="$screenshot_path" target="preview"><h4 class="card-title">Chrome > Screenshot</h4></a>
|
||||
<a href="{{screenshot_path}}" target="preview"><h4 class="card-title">Chrome > Screenshot</h4></a>
|
||||
<p class="card-text">archive/screenshot.png</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-lg-3">
|
||||
<div class="card">
|
||||
<iframe class="card-img-top" src="$dom_path" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
|
||||
<iframe class="card-img-top" src="{{dom_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
|
||||
<div class="card-body">
|
||||
<a href="$dom_path" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<a href="{{dom_path}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<img src="../../static/external.png" class="external"/>
|
||||
</a>
|
||||
<a href="$dom_path" target="preview"><h4 class="card-title">Chrome > HTML</h4></a>
|
||||
<a href="{{dom_path}}" target="preview"><h4 class="card-title">Chrome > HTML</h4></a>
|
||||
<p class="card-text">archive/output.html</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-lg-3">
|
||||
<div class="card">
|
||||
<iframe class="card-img-top" src="$readability_path" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
|
||||
<iframe class="card-img-top" src="{{readability_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
|
||||
<div class="card-body">
|
||||
<a href="$readability_path" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<a href="{{readability_path}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<img src="../../static/external.png" class="external"/>
|
||||
</a>
|
||||
<a href="$readability_path" target="preview"><h4 class="card-title">Readability</h4></a>
|
||||
<a href="{{readability_path}}" target="preview"><h4 class="card-title">Readability</h4></a>
|
||||
<p class="card-text">archive/readability/...</p>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -416,12 +416,12 @@
|
|||
<br/>
|
||||
<div class="col-lg-3">
|
||||
<div class="card">
|
||||
<iframe class="card-img-top" src="$mercury_path" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
|
||||
<iframe class="card-img-top" src="{{mercury_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
|
||||
<div class="card-body">
|
||||
<a href="$mercury_path" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<a href="{{mercury_path}}" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
|
||||
<img src="../../static/external.png" class="external"/>
|
||||
</a>
|
||||
<a href="$mercury_path" target="preview"><h4 class="card-title">mercury</h4></a>
|
||||
<a href="{{mercury_path}}" target="preview"><h4 class="card-title">mercury</h4></a>
|
||||
<p class="card-text">archive/mercury/...</p>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -429,7 +429,7 @@
|
|||
</div>
|
||||
</div>
|
||||
</header>
|
||||
<iframe sandbox="allow-same-origin allow-scripts allow-forms" class="full-page-iframe" src="$archive_url" name="preview"></iframe>
|
||||
<iframe sandbox="allow-same-origin allow-scripts allow-forms" class="full-page-iframe" src="{{archive_url}}" name="preview"></iframe>
|
||||
|
||||
<script
|
||||
src="https://code.jquery.com/jquery-3.2.1.slim.min.js"
|
|
@ -161,12 +161,6 @@
|
|||
.in-progress {
|
||||
display: none;
|
||||
}
|
||||
body[data-status~=finished] .files-spinner {
|
||||
display: none;
|
||||
}
|
||||
/*body[data-status~=running] .in-progress {
|
||||
display: inline-block;
|
||||
}*/
|
||||
tr td a.favicon img {
|
||||
padding-left: 6px;
|
||||
padding-right: 12px;
|
||||
|
@ -210,7 +204,7 @@
|
|||
});
|
||||
</script>
|
||||
</head>
|
||||
<body data-status="finished">
|
||||
<body>
|
||||
<header>
|
||||
<div class="header-top container-fluid">
|
||||
<div class="row nav">
|
||||
|
@ -228,6 +222,7 @@
|
|||
</div>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<table id="table-bookmarks">
|
||||
<thead>
|
||||
<tr>
|
||||
|
@ -239,26 +234,7 @@
|
|||
</thead>
|
||||
<tbody>
|
||||
{% for link in links %}
|
||||
<tr>
|
||||
<td title="{{link.timestamp}}">{{link.bookmarked_date}}</td>
|
||||
<td class="title-col">
|
||||
{% if link.is_archived %}
|
||||
<a href="archive/{{link.timestamp}}/index.html"><img src="archive/{{link.timestamp}}/favicon.ico" class="link-favicon" decoding="async"></a>
|
||||
{% else %}
|
||||
<a href="archive/{{link.timestamp}}/index.html"><img src="{% static 'spinner.gif' %}" class="link-favicon" decoding="async"></a>
|
||||
{% endif %}
|
||||
<a href="archive/{{link.timestamp}}/{{link.canonical_outputs.wget_path}}" title="{{link.title}}">
|
||||
<span data-title-for="{{link.url}}" data-archived="{{link.is_archived}}">{{link.title|default:'Loading...'}}</span>
|
||||
<small style="float:right">{{link.tags|default:''}}</small>
|
||||
</a>
|
||||
</td>
|
||||
<td>
|
||||
<a href="archive/{{link.timestamp}}/index.html">📄
|
||||
<span data-number-for="{{link.url}}" title="Fetching any missing files...">{{link.num_outputs}} <img src="{% static 'spinner.gif' %}" class="files-spinner" decoding="async"/></span>
|
||||
</a>
|
||||
</td>
|
||||
<td style="text-align:left"><a href="{{link.url}}">{{link.url}}</a></td>
|
||||
</tr>
|
||||
{% include 'main_index_row.html' with link=link %}
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
|
|
@ -4,17 +4,21 @@
|
|||
<title>Archived Sites</title>
|
||||
<meta charset="utf-8" name="viewport" content="width=device-width, initial-scale=1">
|
||||
</head>
|
||||
<body data-status="$status">
|
||||
<body data-status="{{status}}">
|
||||
<table id="table-bookmarks">
|
||||
<thead>
|
||||
<tr class="thead-tr">
|
||||
<th style="width: 100px;">Bookmarked</th>
|
||||
<th style="width: 26vw;">Saved Link ($num_links)</th>
|
||||
<th style="width: 26vw;">Saved Link ({{num_links}})</th>
|
||||
<th style="width: 50px">Files</th>
|
||||
<th style="width: 16vw;whitespace:nowrap;overflow-x:hidden;">Original URL</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>$rows</tbody>
|
||||
<tbody>
|
||||
{% for link in links %}
|
||||
{% include "main_index_row.html" with link=link %}
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
22
archivebox/themes/default/main_index_row.html
Normal file
|
@ -0,0 +1,22 @@
|
|||
{% load static %}
|
||||
|
||||
<tr>
|
||||
<td title="{{link.timestamp}}"> {% if link.bookmarked_date %} {{ link.bookmarked_date }} {% else %} {{ link.added }} {% endif %} </td>
|
||||
<td class="title-col">
|
||||
{% if link.is_archived %}
|
||||
<a href="archive/{{link.timestamp}}/index.html"><img src="archive/{{link.timestamp}}/favicon.ico" class="link-favicon" decoding="async"></a>
|
||||
{% else %}
|
||||
<a href="archive/{{link.timestamp}}/index.html"><img src="{% static 'spinner.gif' %}" class="link-favicon" decoding="async"></a>
|
||||
{% endif %}
|
||||
<a href="archive/{{link.timestamp}}/{{link.canonical_outputs.wget_path}}" title="{{link.title}}">
|
||||
<span data-title-for="{{link.url}}" data-archived="{{link.is_archived}}">{{link.title|default:'Loading...'}}</span>
|
||||
<small style="float:right">{% if link.tags_str != None %} {{link.tags_str|default:''}} {% else %} {{ link.tags|default:'' }} {% endif %}</small>
|
||||
</a>
|
||||
</td>
|
||||
<td>
|
||||
<a href="archive/{{link.timestamp}}/index.html">📄
|
||||
<span data-number-for="{{link.url}}" title="Fetching any missing files...">{% if link.icons %} {{link.icons}} {% else %} {{ link.num_outputs}} {% endif %}<img src="{% static 'spinner.gif' %}" class="files-spinner" decoding="async"/></span>
|
||||
</a>
|
||||
</td>
|
||||
<td style="text-align:left"><a href="{{link.url}}">{{link.url}}</a></td>
|
||||
</tr>
|
|
@ -91,6 +91,7 @@ body.model-snapshot.change-list #content .object-tools {
|
|||
padding: 0px;
|
||||
background: none;
|
||||
margin-right: 0px;
|
||||
width: auto;
|
||||
}
|
||||
|
||||
#content #changelist .actions .button {
|
||||
|
|
Before Width: | Height: | Size: 15 KiB |
|
@ -127,12 +127,6 @@
|
|||
.in-progress {
|
||||
display: none;
|
||||
}
|
||||
body[data-status~=finished] .files-spinner {
|
||||
display: none;
|
||||
}
|
||||
/*body[data-status~=running] .in-progress {
|
||||
display: inline-block;
|
||||
}*/
|
||||
tr td a.favicon img {
|
||||
padding-left: 6px;
|
||||
padding-right: 12px;
|
||||
|
@ -176,7 +170,7 @@
|
|||
});
|
||||
</script>
|
||||
</head>
|
||||
<body data-status="$status">
|
||||
<body>
|
||||
<header>
|
||||
<div class="header-top container-fluid">
|
||||
<div class="row nav">
|
||||
|
@ -198,7 +192,7 @@
|
|||
<thead>
|
||||
<tr class="thead-tr">
|
||||
<th style="width: 100px;">Bookmarked</th>
|
||||
<th style="width: 26vw;">Saved Link ($num_links)</th>
|
||||
<th style="width: 26vw;">Snapshot ($num_links)</th>
|
||||
<th style="width: 50px">Files</th>
|
||||
<th style="width: 16vw;whitespace:nowrap;overflow-x:hidden;">Original URL</th>
|
||||
</tr>
|
||||
|
@ -209,9 +203,8 @@
|
|||
<br/>
|
||||
<center>
|
||||
<small>
|
||||
Archive created using <a href="https://github.com/ArchiveBox/ArchiveBox" title="Github">ArchiveBox</a>
|
||||
version <a href="https://github.com/ArchiveBox/ArchiveBox/tree/v$version" title="Git commit">v$version</a> |
|
||||
Download index as <a href="index.json" title="JSON summary of archived links.">JSON</a>
|
||||
Created using <a href="https://github.com/ArchiveBox/ArchiveBox" title="Github">ArchiveBox</a>
|
||||
version <a href="https://github.com/ArchiveBox/ArchiveBox/releases" title="Releases">v$version</a>.
|
||||
<br/><br/>
|
||||
$footer_info
|
||||
</small>
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
</td>
|
||||
<td>
|
||||
<a href="$archive_path/index.html">📄
|
||||
<span data-number-for="$url" title="Fetching any missing files...">$num_outputs <img src="static/spinner.gif" class="files-spinner" decoding="async"/></span>
|
||||
<span data-number-for="$url" title="Number of extractor outputs">$num_outputs</span>
|
||||
</a>
|
||||
</td>
|
||||
<td style="text-align:left"><a href="$url">$url</a></td>
|
||||
|
|
|
@ -1,2 +0,0 @@
|
|||
User-agent: *
|
||||
Disallow: /
|
Before Width: | Height: | Size: 17 KiB |
Before Width: | Height: | Size: 1.6 KiB |
|
@ -1,166 +0,0 @@
|
|||
/*!
|
||||
DataTables 1.10.19
|
||||
©2008-2018 SpryMedia Ltd - datatables.net/license
|
||||
*/
|
||||
(function(h){"function"===typeof define&&define.amd?define(["jquery"],function(E){return h(E,window,document)}):"object"===typeof exports?module.exports=function(E,H){E||(E=window);H||(H="undefined"!==typeof window?require("jquery"):require("jquery")(E));return h(H,E,E.document)}:h(jQuery,window,document)})(function(h,E,H,k){function Z(a){var b,c,d={};h.each(a,function(e){if((b=e.match(/^([^A-Z]+?)([A-Z])/))&&-1!=="a aa ai ao as b fn i m o s ".indexOf(b[1]+" "))c=e.replace(b[0],b[2].toLowerCase()),
|
||||
d[c]=e,"o"===b[1]&&Z(a[e])});a._hungarianMap=d}function J(a,b,c){a._hungarianMap||Z(a);var d;h.each(b,function(e){d=a._hungarianMap[e];if(d!==k&&(c||b[d]===k))"o"===d.charAt(0)?(b[d]||(b[d]={}),h.extend(!0,b[d],b[e]),J(a[d],b[d],c)):b[d]=b[e]})}function Ca(a){var b=n.defaults.oLanguage,c=b.sDecimal;c&&Da(c);if(a){var d=a.sZeroRecords;!a.sEmptyTable&&(d&&"No data available in table"===b.sEmptyTable)&&F(a,a,"sZeroRecords","sEmptyTable");!a.sLoadingRecords&&(d&&"Loading..."===b.sLoadingRecords)&&F(a,
|
||||
a,"sZeroRecords","sLoadingRecords");a.sInfoThousands&&(a.sThousands=a.sInfoThousands);(a=a.sDecimal)&&c!==a&&Da(a)}}function fb(a){A(a,"ordering","bSort");A(a,"orderMulti","bSortMulti");A(a,"orderClasses","bSortClasses");A(a,"orderCellsTop","bSortCellsTop");A(a,"order","aaSorting");A(a,"orderFixed","aaSortingFixed");A(a,"paging","bPaginate");A(a,"pagingType","sPaginationType");A(a,"pageLength","iDisplayLength");A(a,"searching","bFilter");"boolean"===typeof a.sScrollX&&(a.sScrollX=a.sScrollX?"100%":
|
||||
"");"boolean"===typeof a.scrollX&&(a.scrollX=a.scrollX?"100%":"");if(a=a.aoSearchCols)for(var b=0,c=a.length;b<c;b++)a[b]&&J(n.models.oSearch,a[b])}function gb(a){A(a,"orderable","bSortable");A(a,"orderData","aDataSort");A(a,"orderSequence","asSorting");A(a,"orderDataType","sortDataType");var b=a.aDataSort;"number"===typeof b&&!h.isArray(b)&&(a.aDataSort=[b])}function hb(a){if(!n.__browser){var b={};n.__browser=b;var c=h("<div/>").css({position:"fixed",top:0,left:-1*h(E).scrollLeft(),height:1,width:1,
|
||||
overflow:"hidden"}).append(h("<div/>").css({position:"absolute",top:1,left:1,width:100,overflow:"scroll"}).append(h("<div/>").css({width:"100%",height:10}))).appendTo("body"),d=c.children(),e=d.children();b.barWidth=d[0].offsetWidth-d[0].clientWidth;b.bScrollOversize=100===e[0].offsetWidth&&100!==d[0].clientWidth;b.bScrollbarLeft=1!==Math.round(e.offset().left);b.bBounding=c[0].getBoundingClientRect().width?!0:!1;c.remove()}h.extend(a.oBrowser,n.__browser);a.oScroll.iBarWidth=n.__browser.barWidth}
|
||||
function ib(a,b,c,d,e,f){var g,j=!1;c!==k&&(g=c,j=!0);for(;d!==e;)a.hasOwnProperty(d)&&(g=j?b(g,a[d],d,a):a[d],j=!0,d+=f);return g}function Ea(a,b){var c=n.defaults.column,d=a.aoColumns.length,c=h.extend({},n.models.oColumn,c,{nTh:b?b:H.createElement("th"),sTitle:c.sTitle?c.sTitle:b?b.innerHTML:"",aDataSort:c.aDataSort?c.aDataSort:[d],mData:c.mData?c.mData:d,idx:d});a.aoColumns.push(c);c=a.aoPreSearchCols;c[d]=h.extend({},n.models.oSearch,c[d]);ka(a,d,h(b).data())}function ka(a,b,c){var b=a.aoColumns[b],
|
||||
d=a.oClasses,e=h(b.nTh);if(!b.sWidthOrig){b.sWidthOrig=e.attr("width")||null;var f=(e.attr("style")||"").match(/width:\s*(\d+[pxem%]+)/);f&&(b.sWidthOrig=f[1])}c!==k&&null!==c&&(gb(c),J(n.defaults.column,c),c.mDataProp!==k&&!c.mData&&(c.mData=c.mDataProp),c.sType&&(b._sManualType=c.sType),c.className&&!c.sClass&&(c.sClass=c.className),c.sClass&&e.addClass(c.sClass),h.extend(b,c),F(b,c,"sWidth","sWidthOrig"),c.iDataSort!==k&&(b.aDataSort=[c.iDataSort]),F(b,c,"aDataSort"));var g=b.mData,j=S(g),i=b.mRender?
|
||||
S(b.mRender):null,c=function(a){return"string"===typeof a&&-1!==a.indexOf("@")};b._bAttrSrc=h.isPlainObject(g)&&(c(g.sort)||c(g.type)||c(g.filter));b._setter=null;b.fnGetData=function(a,b,c){var d=j(a,b,k,c);return i&&b?i(d,b,a,c):d};b.fnSetData=function(a,b,c){return N(g)(a,b,c)};"number"!==typeof g&&(a._rowReadObject=!0);a.oFeatures.bSort||(b.bSortable=!1,e.addClass(d.sSortableNone));a=-1!==h.inArray("asc",b.asSorting);c=-1!==h.inArray("desc",b.asSorting);!b.bSortable||!a&&!c?(b.sSortingClass=d.sSortableNone,
|
||||
b.sSortingClassJUI=""):a&&!c?(b.sSortingClass=d.sSortableAsc,b.sSortingClassJUI=d.sSortJUIAscAllowed):!a&&c?(b.sSortingClass=d.sSortableDesc,b.sSortingClassJUI=d.sSortJUIDescAllowed):(b.sSortingClass=d.sSortable,b.sSortingClassJUI=d.sSortJUI)}function $(a){if(!1!==a.oFeatures.bAutoWidth){var b=a.aoColumns;Fa(a);for(var c=0,d=b.length;c<d;c++)b[c].nTh.style.width=b[c].sWidth}b=a.oScroll;(""!==b.sY||""!==b.sX)&&la(a);r(a,null,"column-sizing",[a])}function aa(a,b){var c=ma(a,"bVisible");return"number"===
|
||||
typeof c[b]?c[b]:null}function ba(a,b){var c=ma(a,"bVisible"),c=h.inArray(b,c);return-1!==c?c:null}function V(a){var b=0;h.each(a.aoColumns,function(a,d){d.bVisible&&"none"!==h(d.nTh).css("display")&&b++});return b}function ma(a,b){var c=[];h.map(a.aoColumns,function(a,e){a[b]&&c.push(e)});return c}function Ga(a){var b=a.aoColumns,c=a.aoData,d=n.ext.type.detect,e,f,g,j,i,h,l,q,t;e=0;for(f=b.length;e<f;e++)if(l=b[e],t=[],!l.sType&&l._sManualType)l.sType=l._sManualType;else if(!l.sType){g=0;for(j=d.length;g<
|
||||
j;g++){i=0;for(h=c.length;i<h;i++){t[i]===k&&(t[i]=B(a,i,e,"type"));q=d[g](t[i],a);if(!q&&g!==d.length-1)break;if("html"===q)break}if(q){l.sType=q;break}}l.sType||(l.sType="string")}}function jb(a,b,c,d){var e,f,g,j,i,m,l=a.aoColumns;if(b)for(e=b.length-1;0<=e;e--){m=b[e];var q=m.targets!==k?m.targets:m.aTargets;h.isArray(q)||(q=[q]);f=0;for(g=q.length;f<g;f++)if("number"===typeof q[f]&&0<=q[f]){for(;l.length<=q[f];)Ea(a);d(q[f],m)}else if("number"===typeof q[f]&&0>q[f])d(l.length+q[f],m);else if("string"===
|
||||
typeof q[f]){j=0;for(i=l.length;j<i;j++)("_all"==q[f]||h(l[j].nTh).hasClass(q[f]))&&d(j,m)}}if(c){e=0;for(a=c.length;e<a;e++)d(e,c[e])}}function O(a,b,c,d){var e=a.aoData.length,f=h.extend(!0,{},n.models.oRow,{src:c?"dom":"data",idx:e});f._aData=b;a.aoData.push(f);for(var g=a.aoColumns,j=0,i=g.length;j<i;j++)g[j].sType=null;a.aiDisplayMaster.push(e);b=a.rowIdFn(b);b!==k&&(a.aIds[b]=f);(c||!a.oFeatures.bDeferRender)&&Ha(a,e,c,d);return e}function na(a,b){var c;b instanceof h||(b=h(b));return b.map(function(b,
|
||||
e){c=Ia(a,e);return O(a,c.data,e,c.cells)})}function B(a,b,c,d){var e=a.iDraw,f=a.aoColumns[c],g=a.aoData[b]._aData,j=f.sDefaultContent,i=f.fnGetData(g,d,{settings:a,row:b,col:c});if(i===k)return a.iDrawError!=e&&null===j&&(K(a,0,"Requested unknown parameter "+("function"==typeof f.mData?"{function}":"'"+f.mData+"'")+" for row "+b+", column "+c,4),a.iDrawError=e),j;if((i===g||null===i)&&null!==j&&d!==k)i=j;else if("function"===typeof i)return i.call(g);return null===i&&"display"==d?"":i}function kb(a,
|
||||
b,c,d){a.aoColumns[c].fnSetData(a.aoData[b]._aData,d,{settings:a,row:b,col:c})}function Ja(a){return h.map(a.match(/(\\.|[^\.])+/g)||[""],function(a){return a.replace(/\\\./g,".")})}function S(a){if(h.isPlainObject(a)){var b={};h.each(a,function(a,c){c&&(b[a]=S(c))});return function(a,c,f,g){var j=b[c]||b._;return j!==k?j(a,c,f,g):a}}if(null===a)return function(a){return a};if("function"===typeof a)return function(b,c,f,g){return a(b,c,f,g)};if("string"===typeof a&&(-1!==a.indexOf(".")||-1!==a.indexOf("[")||
|
||||
-1!==a.indexOf("("))){var c=function(a,b,f){var g,j;if(""!==f){j=Ja(f);for(var i=0,m=j.length;i<m;i++){f=j[i].match(ca);g=j[i].match(W);if(f){j[i]=j[i].replace(ca,"");""!==j[i]&&(a=a[j[i]]);g=[];j.splice(0,i+1);j=j.join(".");if(h.isArray(a)){i=0;for(m=a.length;i<m;i++)g.push(c(a[i],b,j))}a=f[0].substring(1,f[0].length-1);a=""===a?g:g.join(a);break}else if(g){j[i]=j[i].replace(W,"");a=a[j[i]]();continue}if(null===a||a[j[i]]===k)return k;a=a[j[i]]}}return a};return function(b,e){return c(b,e,a)}}return function(b){return b[a]}}
|
||||
function N(a){if(h.isPlainObject(a))return N(a._);if(null===a)return function(){};if("function"===typeof a)return function(b,d,e){a(b,"set",d,e)};if("string"===typeof a&&(-1!==a.indexOf(".")||-1!==a.indexOf("[")||-1!==a.indexOf("("))){var b=function(a,d,e){var e=Ja(e),f;f=e[e.length-1];for(var g,j,i=0,m=e.length-1;i<m;i++){g=e[i].match(ca);j=e[i].match(W);if(g){e[i]=e[i].replace(ca,"");a[e[i]]=[];f=e.slice();f.splice(0,i+1);g=f.join(".");if(h.isArray(d)){j=0;for(m=d.length;j<m;j++)f={},b(f,d[j],g),
|
||||
a[e[i]].push(f)}else a[e[i]]=d;return}j&&(e[i]=e[i].replace(W,""),a=a[e[i]](d));if(null===a[e[i]]||a[e[i]]===k)a[e[i]]={};a=a[e[i]]}if(f.match(W))a[f.replace(W,"")](d);else a[f.replace(ca,"")]=d};return function(c,d){return b(c,d,a)}}return function(b,d){b[a]=d}}function Ka(a){return D(a.aoData,"_aData")}function oa(a){a.aoData.length=0;a.aiDisplayMaster.length=0;a.aiDisplay.length=0;a.aIds={}}function pa(a,b,c){for(var d=-1,e=0,f=a.length;e<f;e++)a[e]==b?d=e:a[e]>b&&a[e]--; -1!=d&&c===k&&a.splice(d,
|
||||
1)}function da(a,b,c,d){var e=a.aoData[b],f,g=function(c,d){for(;c.childNodes.length;)c.removeChild(c.firstChild);c.innerHTML=B(a,b,d,"display")};if("dom"===c||(!c||"auto"===c)&&"dom"===e.src)e._aData=Ia(a,e,d,d===k?k:e._aData).data;else{var j=e.anCells;if(j)if(d!==k)g(j[d],d);else{c=0;for(f=j.length;c<f;c++)g(j[c],c)}}e._aSortData=null;e._aFilterData=null;g=a.aoColumns;if(d!==k)g[d].sType=null;else{c=0;for(f=g.length;c<f;c++)g[c].sType=null;La(a,e)}}function Ia(a,b,c,d){var e=[],f=b.firstChild,g,
|
||||
j,i=0,m,l=a.aoColumns,q=a._rowReadObject,d=d!==k?d:q?{}:[],t=function(a,b){if("string"===typeof a){var c=a.indexOf("@");-1!==c&&(c=a.substring(c+1),N(a)(d,b.getAttribute(c)))}},G=function(a){if(c===k||c===i)j=l[i],m=h.trim(a.innerHTML),j&&j._bAttrSrc?(N(j.mData._)(d,m),t(j.mData.sort,a),t(j.mData.type,a),t(j.mData.filter,a)):q?(j._setter||(j._setter=N(j.mData)),j._setter(d,m)):d[i]=m;i++};if(f)for(;f;){g=f.nodeName.toUpperCase();if("TD"==g||"TH"==g)G(f),e.push(f);f=f.nextSibling}else{e=b.anCells;
|
||||
f=0;for(g=e.length;f<g;f++)G(e[f])}if(b=b.firstChild?b:b.nTr)(b=b.getAttribute("id"))&&N(a.rowId)(d,b);return{data:d,cells:e}}function Ha(a,b,c,d){var e=a.aoData[b],f=e._aData,g=[],j,i,m,l,q;if(null===e.nTr){j=c||H.createElement("tr");e.nTr=j;e.anCells=g;j._DT_RowIndex=b;La(a,e);l=0;for(q=a.aoColumns.length;l<q;l++){m=a.aoColumns[l];i=c?d[l]:H.createElement(m.sCellType);i._DT_CellIndex={row:b,column:l};g.push(i);if((!c||m.mRender||m.mData!==l)&&(!h.isPlainObject(m.mData)||m.mData._!==l+".display"))i.innerHTML=
|
||||
B(a,b,l,"display");m.sClass&&(i.className+=" "+m.sClass);m.bVisible&&!c?j.appendChild(i):!m.bVisible&&c&&i.parentNode.removeChild(i);m.fnCreatedCell&&m.fnCreatedCell.call(a.oInstance,i,B(a,b,l),f,b,l)}r(a,"aoRowCreatedCallback",null,[j,f,b,g])}e.nTr.setAttribute("role","row")}function La(a,b){var c=b.nTr,d=b._aData;if(c){var e=a.rowIdFn(d);e&&(c.id=e);d.DT_RowClass&&(e=d.DT_RowClass.split(" "),b.__rowc=b.__rowc?qa(b.__rowc.concat(e)):e,h(c).removeClass(b.__rowc.join(" ")).addClass(d.DT_RowClass));
|
||||
d.DT_RowAttr&&h(c).attr(d.DT_RowAttr);d.DT_RowData&&h(c).data(d.DT_RowData)}}function lb(a){var b,c,d,e,f,g=a.nTHead,j=a.nTFoot,i=0===h("th, td",g).length,m=a.oClasses,l=a.aoColumns;i&&(e=h("<tr/>").appendTo(g));b=0;for(c=l.length;b<c;b++)f=l[b],d=h(f.nTh).addClass(f.sClass),i&&d.appendTo(e),a.oFeatures.bSort&&(d.addClass(f.sSortingClass),!1!==f.bSortable&&(d.attr("tabindex",a.iTabIndex).attr("aria-controls",a.sTableId),Ma(a,f.nTh,b))),f.sTitle!=d[0].innerHTML&&d.html(f.sTitle),Na(a,"header")(a,d,
|
||||
f,m);i&&ea(a.aoHeader,g);h(g).find(">tr").attr("role","row");h(g).find(">tr>th, >tr>td").addClass(m.sHeaderTH);h(j).find(">tr>th, >tr>td").addClass(m.sFooterTH);if(null!==j){a=a.aoFooter[0];b=0;for(c=a.length;b<c;b++)f=l[b],f.nTf=a[b].cell,f.sClass&&h(f.nTf).addClass(f.sClass)}}function fa(a,b,c){var d,e,f,g=[],j=[],i=a.aoColumns.length,m;if(b){c===k&&(c=!1);d=0;for(e=b.length;d<e;d++){g[d]=b[d].slice();g[d].nTr=b[d].nTr;for(f=i-1;0<=f;f--)!a.aoColumns[f].bVisible&&!c&&g[d].splice(f,1);j.push([])}d=
|
||||
0;for(e=g.length;d<e;d++){if(a=g[d].nTr)for(;f=a.firstChild;)a.removeChild(f);f=0;for(b=g[d].length;f<b;f++)if(m=i=1,j[d][f]===k){a.appendChild(g[d][f].cell);for(j[d][f]=1;g[d+i]!==k&&g[d][f].cell==g[d+i][f].cell;)j[d+i][f]=1,i++;for(;g[d][f+m]!==k&&g[d][f].cell==g[d][f+m].cell;){for(c=0;c<i;c++)j[d+c][f+m]=1;m++}h(g[d][f].cell).attr("rowspan",i).attr("colspan",m)}}}}function P(a){var b=r(a,"aoPreDrawCallback","preDraw",[a]);if(-1!==h.inArray(!1,b))C(a,!1);else{var b=[],c=0,d=a.asStripeClasses,e=
|
||||
d.length,f=a.oLanguage,g=a.iInitDisplayStart,j="ssp"==y(a),i=a.aiDisplay;a.bDrawing=!0;g!==k&&-1!==g&&(a._iDisplayStart=j?g:g>=a.fnRecordsDisplay()?0:g,a.iInitDisplayStart=-1);var g=a._iDisplayStart,m=a.fnDisplayEnd();if(a.bDeferLoading)a.bDeferLoading=!1,a.iDraw++,C(a,!1);else if(j){if(!a.bDestroying&&!mb(a))return}else a.iDraw++;if(0!==i.length){f=j?a.aoData.length:m;for(j=j?0:g;j<f;j++){var l=i[j],q=a.aoData[l];null===q.nTr&&Ha(a,l);var t=q.nTr;if(0!==e){var G=d[c%e];q._sRowStripe!=G&&(h(t).removeClass(q._sRowStripe).addClass(G),
|
||||
q._sRowStripe=G)}r(a,"aoRowCallback",null,[t,q._aData,c,j,l]);b.push(t);c++}}else c=f.sZeroRecords,1==a.iDraw&&"ajax"==y(a)?c=f.sLoadingRecords:f.sEmptyTable&&0===a.fnRecordsTotal()&&(c=f.sEmptyTable),b[0]=h("<tr/>",{"class":e?d[0]:""}).append(h("<td />",{valign:"top",colSpan:V(a),"class":a.oClasses.sRowEmpty}).html(c))[0];r(a,"aoHeaderCallback","header",[h(a.nTHead).children("tr")[0],Ka(a),g,m,i]);r(a,"aoFooterCallback","footer",[h(a.nTFoot).children("tr")[0],Ka(a),g,m,i]);d=h(a.nTBody);d.children().detach();
|
||||
d.append(h(b));r(a,"aoDrawCallback","draw",[a]);a.bSorted=!1;a.bFiltered=!1;a.bDrawing=!1}}function T(a,b){var c=a.oFeatures,d=c.bFilter;c.bSort&&nb(a);d?ga(a,a.oPreviousSearch):a.aiDisplay=a.aiDisplayMaster.slice();!0!==b&&(a._iDisplayStart=0);a._drawHold=b;P(a);a._drawHold=!1}function ob(a){var b=a.oClasses,c=h(a.nTable),c=h("<div/>").insertBefore(c),d=a.oFeatures,e=h("<div/>",{id:a.sTableId+"_wrapper","class":b.sWrapper+(a.nTFoot?"":" "+b.sNoFooter)});a.nHolding=c[0];a.nTableWrapper=e[0];a.nTableReinsertBefore=
|
||||
a.nTable.nextSibling;for(var f=a.sDom.split(""),g,j,i,m,l,q,k=0;k<f.length;k++){g=null;j=f[k];if("<"==j){i=h("<div/>")[0];m=f[k+1];if("'"==m||'"'==m){l="";for(q=2;f[k+q]!=m;)l+=f[k+q],q++;"H"==l?l=b.sJUIHeader:"F"==l&&(l=b.sJUIFooter);-1!=l.indexOf(".")?(m=l.split("."),i.id=m[0].substr(1,m[0].length-1),i.className=m[1]):"#"==l.charAt(0)?i.id=l.substr(1,l.length-1):i.className=l;k+=q}e.append(i);e=h(i)}else if(">"==j)e=e.parent();else if("l"==j&&d.bPaginate&&d.bLengthChange)g=pb(a);else if("f"==j&&
|
||||
d.bFilter)g=qb(a);else if("r"==j&&d.bProcessing)g=rb(a);else if("t"==j)g=sb(a);else if("i"==j&&d.bInfo)g=tb(a);else if("p"==j&&d.bPaginate)g=ub(a);else if(0!==n.ext.feature.length){i=n.ext.feature;q=0;for(m=i.length;q<m;q++)if(j==i[q].cFeature){g=i[q].fnInit(a);break}}g&&(i=a.aanFeatures,i[j]||(i[j]=[]),i[j].push(g),e.append(g))}c.replaceWith(e);a.nHolding=null}function ea(a,b){var c=h(b).children("tr"),d,e,f,g,j,i,m,l,q,k;a.splice(0,a.length);f=0;for(i=c.length;f<i;f++)a.push([]);f=0;for(i=c.length;f<
|
||||
i;f++){d=c[f];for(e=d.firstChild;e;){if("TD"==e.nodeName.toUpperCase()||"TH"==e.nodeName.toUpperCase()){l=1*e.getAttribute("colspan");q=1*e.getAttribute("rowspan");l=!l||0===l||1===l?1:l;q=!q||0===q||1===q?1:q;g=0;for(j=a[f];j[g];)g++;m=g;k=1===l?!0:!1;for(j=0;j<l;j++)for(g=0;g<q;g++)a[f+g][m+j]={cell:e,unique:k},a[f+g].nTr=d}e=e.nextSibling}}}function ra(a,b,c){var d=[];c||(c=a.aoHeader,b&&(c=[],ea(c,b)));for(var b=0,e=c.length;b<e;b++)for(var f=0,g=c[b].length;f<g;f++)if(c[b][f].unique&&(!d[f]||
|
||||
!a.bSortCellsTop))d[f]=c[b][f].cell;return d}function sa(a,b,c){r(a,"aoServerParams","serverParams",[b]);if(b&&h.isArray(b)){var d={},e=/(.*?)\[\]$/;h.each(b,function(a,b){var c=b.name.match(e);c?(c=c[0],d[c]||(d[c]=[]),d[c].push(b.value)):d[b.name]=b.value});b=d}var f,g=a.ajax,j=a.oInstance,i=function(b){r(a,null,"xhr",[a,b,a.jqXHR]);c(b)};if(h.isPlainObject(g)&&g.data){f=g.data;var m="function"===typeof f?f(b,a):f,b="function"===typeof f&&m?m:h.extend(!0,b,m);delete g.data}m={data:b,success:function(b){var c=
|
||||
b.error||b.sError;c&&K(a,0,c);a.json=b;i(b)},dataType:"json",cache:!1,type:a.sServerMethod,error:function(b,c){var d=r(a,null,"xhr",[a,null,a.jqXHR]);-1===h.inArray(!0,d)&&("parsererror"==c?K(a,0,"Invalid JSON response",1):4===b.readyState&&K(a,0,"Ajax error",7));C(a,!1)}};a.oAjaxData=b;r(a,null,"preXhr",[a,b]);a.fnServerData?a.fnServerData.call(j,a.sAjaxSource,h.map(b,function(a,b){return{name:b,value:a}}),i,a):a.sAjaxSource||"string"===typeof g?a.jqXHR=h.ajax(h.extend(m,{url:g||a.sAjaxSource})):
|
||||
"function"===typeof g?a.jqXHR=g.call(j,b,i,a):(a.jqXHR=h.ajax(h.extend(m,g)),g.data=f)}function mb(a){return a.bAjaxDataGet?(a.iDraw++,C(a,!0),sa(a,vb(a),function(b){wb(a,b)}),!1):!0}function vb(a){var b=a.aoColumns,c=b.length,d=a.oFeatures,e=a.oPreviousSearch,f=a.aoPreSearchCols,g,j=[],i,m,l,k=X(a);g=a._iDisplayStart;i=!1!==d.bPaginate?a._iDisplayLength:-1;var t=function(a,b){j.push({name:a,value:b})};t("sEcho",a.iDraw);t("iColumns",c);t("sColumns",D(b,"sName").join(","));t("iDisplayStart",g);t("iDisplayLength",
|
||||
i);var G={draw:a.iDraw,columns:[],order:[],start:g,length:i,search:{value:e.sSearch,regex:e.bRegex}};for(g=0;g<c;g++)m=b[g],l=f[g],i="function"==typeof m.mData?"function":m.mData,G.columns.push({data:i,name:m.sName,searchable:m.bSearchable,orderable:m.bSortable,search:{value:l.sSearch,regex:l.bRegex}}),t("mDataProp_"+g,i),d.bFilter&&(t("sSearch_"+g,l.sSearch),t("bRegex_"+g,l.bRegex),t("bSearchable_"+g,m.bSearchable)),d.bSort&&t("bSortable_"+g,m.bSortable);d.bFilter&&(t("sSearch",e.sSearch),t("bRegex",
|
||||
e.bRegex));d.bSort&&(h.each(k,function(a,b){G.order.push({column:b.col,dir:b.dir});t("iSortCol_"+a,b.col);t("sSortDir_"+a,b.dir)}),t("iSortingCols",k.length));b=n.ext.legacy.ajax;return null===b?a.sAjaxSource?j:G:b?j:G}function wb(a,b){var c=ta(a,b),d=b.sEcho!==k?b.sEcho:b.draw,e=b.iTotalRecords!==k?b.iTotalRecords:b.recordsTotal,f=b.iTotalDisplayRecords!==k?b.iTotalDisplayRecords:b.recordsFiltered;if(d){if(1*d<a.iDraw)return;a.iDraw=1*d}oa(a);a._iRecordsTotal=parseInt(e,10);a._iRecordsDisplay=parseInt(f,
|
||||
10);d=0;for(e=c.length;d<e;d++)O(a,c[d]);a.aiDisplay=a.aiDisplayMaster.slice();a.bAjaxDataGet=!1;P(a);a._bInitComplete||ua(a,b);a.bAjaxDataGet=!0;C(a,!1)}function ta(a,b){var c=h.isPlainObject(a.ajax)&&a.ajax.dataSrc!==k?a.ajax.dataSrc:a.sAjaxDataProp;return"data"===c?b.aaData||b[c]:""!==c?S(c)(b):b}function qb(a){var b=a.oClasses,c=a.sTableId,d=a.oLanguage,e=a.oPreviousSearch,f=a.aanFeatures,g='<input type="search" class="'+b.sFilterInput+'"/>',j=d.sSearch,j=j.match(/_INPUT_/)?j.replace("_INPUT_",
|
||||
g):j+g,b=h("<div/>",{id:!f.f?c+"_filter":null,"class":b.sFilter}).append(h("<label/>").append(j)),f=function(){var b=!this.value?"":this.value;b!=e.sSearch&&(ga(a,{sSearch:b,bRegex:e.bRegex,bSmart:e.bSmart,bCaseInsensitive:e.bCaseInsensitive}),a._iDisplayStart=0,P(a))},g=null!==a.searchDelay?a.searchDelay:"ssp"===y(a)?400:0,i=h("input",b).val(e.sSearch).attr("placeholder",d.sSearchPlaceholder).on("keyup.DT search.DT input.DT paste.DT cut.DT",g?Oa(f,g):f).on("keypress.DT",function(a){if(13==a.keyCode)return!1}).attr("aria-controls",
|
||||
c);h(a.nTable).on("search.dt.DT",function(b,c){if(a===c)try{i[0]!==H.activeElement&&i.val(e.sSearch)}catch(d){}});return b[0]}function ga(a,b,c){var d=a.oPreviousSearch,e=a.aoPreSearchCols,f=function(a){d.sSearch=a.sSearch;d.bRegex=a.bRegex;d.bSmart=a.bSmart;d.bCaseInsensitive=a.bCaseInsensitive};Ga(a);if("ssp"!=y(a)){xb(a,b.sSearch,c,b.bEscapeRegex!==k?!b.bEscapeRegex:b.bRegex,b.bSmart,b.bCaseInsensitive);f(b);for(b=0;b<e.length;b++)yb(a,e[b].sSearch,b,e[b].bEscapeRegex!==k?!e[b].bEscapeRegex:e[b].bRegex,
|
||||
e[b].bSmart,e[b].bCaseInsensitive);zb(a)}else f(b);a.bFiltered=!0;r(a,null,"search",[a])}function zb(a){for(var b=n.ext.search,c=a.aiDisplay,d,e,f=0,g=b.length;f<g;f++){for(var j=[],i=0,m=c.length;i<m;i++)e=c[i],d=a.aoData[e],b[f](a,d._aFilterData,e,d._aData,i)&&j.push(e);c.length=0;h.merge(c,j)}}function yb(a,b,c,d,e,f){if(""!==b){for(var g=[],j=a.aiDisplay,d=Pa(b,d,e,f),e=0;e<j.length;e++)b=a.aoData[j[e]]._aFilterData[c],d.test(b)&&g.push(j[e]);a.aiDisplay=g}}function xb(a,b,c,d,e,f){var d=Pa(b,
|
||||
d,e,f),f=a.oPreviousSearch.sSearch,g=a.aiDisplayMaster,j,e=[];0!==n.ext.search.length&&(c=!0);j=Ab(a);if(0>=b.length)a.aiDisplay=g.slice();else{if(j||c||f.length>b.length||0!==b.indexOf(f)||a.bSorted)a.aiDisplay=g.slice();b=a.aiDisplay;for(c=0;c<b.length;c++)d.test(a.aoData[b[c]]._sFilterRow)&&e.push(b[c]);a.aiDisplay=e}}function Pa(a,b,c,d){a=b?a:Qa(a);c&&(a="^(?=.*?"+h.map(a.match(/"[^"]+"|[^ ]+/g)||[""],function(a){if('"'===a.charAt(0))var b=a.match(/^"(.*)"$/),a=b?b[1]:a;return a.replace('"',
|
||||
"")}).join(")(?=.*?")+").*$");return RegExp(a,d?"i":"")}function Ab(a){var b=a.aoColumns,c,d,e,f,g,j,i,h,l=n.ext.type.search;c=!1;d=0;for(f=a.aoData.length;d<f;d++)if(h=a.aoData[d],!h._aFilterData){j=[];e=0;for(g=b.length;e<g;e++)c=b[e],c.bSearchable?(i=B(a,d,e,"filter"),l[c.sType]&&(i=l[c.sType](i)),null===i&&(i=""),"string"!==typeof i&&i.toString&&(i=i.toString())):i="",i.indexOf&&-1!==i.indexOf("&")&&(va.innerHTML=i,i=Wb?va.textContent:va.innerText),i.replace&&(i=i.replace(/[\r\n]/g,"")),j.push(i);
|
||||
h._aFilterData=j;h._sFilterRow=j.join(" ");c=!0}return c}function Bb(a){return{search:a.sSearch,smart:a.bSmart,regex:a.bRegex,caseInsensitive:a.bCaseInsensitive}}function Cb(a){return{sSearch:a.search,bSmart:a.smart,bRegex:a.regex,bCaseInsensitive:a.caseInsensitive}}function tb(a){var b=a.sTableId,c=a.aanFeatures.i,d=h("<div/>",{"class":a.oClasses.sInfo,id:!c?b+"_info":null});c||(a.aoDrawCallback.push({fn:Db,sName:"information"}),d.attr("role","status").attr("aria-live","polite"),h(a.nTable).attr("aria-describedby",
|
||||
b+"_info"));return d[0]}function Db(a){var b=a.aanFeatures.i;if(0!==b.length){var c=a.oLanguage,d=a._iDisplayStart+1,e=a.fnDisplayEnd(),f=a.fnRecordsTotal(),g=a.fnRecordsDisplay(),j=g?c.sInfo:c.sInfoEmpty;g!==f&&(j+=" "+c.sInfoFiltered);j+=c.sInfoPostFix;j=Eb(a,j);c=c.fnInfoCallback;null!==c&&(j=c.call(a.oInstance,a,d,e,f,g,j));h(b).html(j)}}function Eb(a,b){var c=a.fnFormatNumber,d=a._iDisplayStart+1,e=a._iDisplayLength,f=a.fnRecordsDisplay(),g=-1===e;return b.replace(/_START_/g,c.call(a,d)).replace(/_END_/g,
|
||||
c.call(a,a.fnDisplayEnd())).replace(/_MAX_/g,c.call(a,a.fnRecordsTotal())).replace(/_TOTAL_/g,c.call(a,f)).replace(/_PAGE_/g,c.call(a,g?1:Math.ceil(d/e))).replace(/_PAGES_/g,c.call(a,g?1:Math.ceil(f/e)))}function ha(a){var b,c,d=a.iInitDisplayStart,e=a.aoColumns,f;c=a.oFeatures;var g=a.bDeferLoading;if(a.bInitialised){ob(a);lb(a);fa(a,a.aoHeader);fa(a,a.aoFooter);C(a,!0);c.bAutoWidth&&Fa(a);b=0;for(c=e.length;b<c;b++)f=e[b],f.sWidth&&(f.nTh.style.width=v(f.sWidth));r(a,null,"preInit",[a]);T(a);e=
|
||||
y(a);if("ssp"!=e||g)"ajax"==e?sa(a,[],function(c){var f=ta(a,c);for(b=0;b<f.length;b++)O(a,f[b]);a.iInitDisplayStart=d;T(a);C(a,!1);ua(a,c)},a):(C(a,!1),ua(a))}else setTimeout(function(){ha(a)},200)}function ua(a,b){a._bInitComplete=!0;(b||a.oInit.aaData)&&$(a);r(a,null,"plugin-init",[a,b]);r(a,"aoInitComplete","init",[a,b])}function Ra(a,b){var c=parseInt(b,10);a._iDisplayLength=c;Sa(a);r(a,null,"length",[a,c])}function pb(a){for(var b=a.oClasses,c=a.sTableId,d=a.aLengthMenu,e=h.isArray(d[0]),f=
|
||||
e?d[0]:d,d=e?d[1]:d,e=h("<select/>",{name:c+"_length","aria-controls":c,"class":b.sLengthSelect}),g=0,j=f.length;g<j;g++)e[0][g]=new Option("number"===typeof d[g]?a.fnFormatNumber(d[g]):d[g],f[g]);var i=h("<div><label/></div>").addClass(b.sLength);a.aanFeatures.l||(i[0].id=c+"_length");i.children().append(a.oLanguage.sLengthMenu.replace("_MENU_",e[0].outerHTML));h("select",i).val(a._iDisplayLength).on("change.DT",function(){Ra(a,h(this).val());P(a)});h(a.nTable).on("length.dt.DT",function(b,c,d){a===
|
||||
c&&h("select",i).val(d)});return i[0]}function ub(a){var b=a.sPaginationType,c=n.ext.pager[b],d="function"===typeof c,e=function(a){P(a)},b=h("<div/>").addClass(a.oClasses.sPaging+b)[0],f=a.aanFeatures;d||c.fnInit(a,b,e);f.p||(b.id=a.sTableId+"_paginate",a.aoDrawCallback.push({fn:function(a){if(d){var b=a._iDisplayStart,i=a._iDisplayLength,h=a.fnRecordsDisplay(),l=-1===i,b=l?0:Math.ceil(b/i),i=l?1:Math.ceil(h/i),h=c(b,i),k,l=0;for(k=f.p.length;l<k;l++)Na(a,"pageButton")(a,f.p[l],l,h,b,i)}else c.fnUpdate(a,
|
||||
e)},sName:"pagination"}));return b}function Ta(a,b,c){var d=a._iDisplayStart,e=a._iDisplayLength,f=a.fnRecordsDisplay();0===f||-1===e?d=0:"number"===typeof b?(d=b*e,d>f&&(d=0)):"first"==b?d=0:"previous"==b?(d=0<=e?d-e:0,0>d&&(d=0)):"next"==b?d+e<f&&(d+=e):"last"==b?d=Math.floor((f-1)/e)*e:K(a,0,"Unknown paging action: "+b,5);b=a._iDisplayStart!==d;a._iDisplayStart=d;b&&(r(a,null,"page",[a]),c&&P(a));return b}function rb(a){return h("<div/>",{id:!a.aanFeatures.r?a.sTableId+"_processing":null,"class":a.oClasses.sProcessing}).html(a.oLanguage.sProcessing).insertBefore(a.nTable)[0]}
|
||||
function C(a,b){a.oFeatures.bProcessing&&h(a.aanFeatures.r).css("display",b?"block":"none");r(a,null,"processing",[a,b])}function sb(a){var b=h(a.nTable);b.attr("role","grid");var c=a.oScroll;if(""===c.sX&&""===c.sY)return a.nTable;var d=c.sX,e=c.sY,f=a.oClasses,g=b.children("caption"),j=g.length?g[0]._captionSide:null,i=h(b[0].cloneNode(!1)),m=h(b[0].cloneNode(!1)),l=b.children("tfoot");l.length||(l=null);i=h("<div/>",{"class":f.sScrollWrapper}).append(h("<div/>",{"class":f.sScrollHead}).css({overflow:"hidden",
|
||||
position:"relative",border:0,width:d?!d?null:v(d):"100%"}).append(h("<div/>",{"class":f.sScrollHeadInner}).css({"box-sizing":"content-box",width:c.sXInner||"100%"}).append(i.removeAttr("id").css("margin-left",0).append("top"===j?g:null).append(b.children("thead"))))).append(h("<div/>",{"class":f.sScrollBody}).css({position:"relative",overflow:"auto",width:!d?null:v(d)}).append(b));l&&i.append(h("<div/>",{"class":f.sScrollFoot}).css({overflow:"hidden",border:0,width:d?!d?null:v(d):"100%"}).append(h("<div/>",
|
||||
{"class":f.sScrollFootInner}).append(m.removeAttr("id").css("margin-left",0).append("bottom"===j?g:null).append(b.children("tfoot")))));var b=i.children(),k=b[0],f=b[1],t=l?b[2]:null;if(d)h(f).on("scroll.DT",function(){var a=this.scrollLeft;k.scrollLeft=a;l&&(t.scrollLeft=a)});h(f).css(e&&c.bCollapse?"max-height":"height",e);a.nScrollHead=k;a.nScrollBody=f;a.nScrollFoot=t;a.aoDrawCallback.push({fn:la,sName:"scrolling"});return i[0]}function la(a){var b=a.oScroll,c=b.sX,d=b.sXInner,e=b.sY,b=b.iBarWidth,
|
||||
f=h(a.nScrollHead),g=f[0].style,j=f.children("div"),i=j[0].style,m=j.children("table"),j=a.nScrollBody,l=h(j),q=j.style,t=h(a.nScrollFoot).children("div"),n=t.children("table"),o=h(a.nTHead),p=h(a.nTable),s=p[0],r=s.style,u=a.nTFoot?h(a.nTFoot):null,x=a.oBrowser,U=x.bScrollOversize,Xb=D(a.aoColumns,"nTh"),Q,L,R,w,Ua=[],y=[],z=[],A=[],B,C=function(a){a=a.style;a.paddingTop="0";a.paddingBottom="0";a.borderTopWidth="0";a.borderBottomWidth="0";a.height=0};L=j.scrollHeight>j.clientHeight;if(a.scrollBarVis!==
|
||||
L&&a.scrollBarVis!==k)a.scrollBarVis=L,$(a);else{a.scrollBarVis=L;p.children("thead, tfoot").remove();u&&(R=u.clone().prependTo(p),Q=u.find("tr"),R=R.find("tr"));w=o.clone().prependTo(p);o=o.find("tr");L=w.find("tr");w.find("th, td").removeAttr("tabindex");c||(q.width="100%",f[0].style.width="100%");h.each(ra(a,w),function(b,c){B=aa(a,b);c.style.width=a.aoColumns[B].sWidth});u&&I(function(a){a.style.width=""},R);f=p.outerWidth();if(""===c){r.width="100%";if(U&&(p.find("tbody").height()>j.offsetHeight||
|
||||
"scroll"==l.css("overflow-y")))r.width=v(p.outerWidth()-b);f=p.outerWidth()}else""!==d&&(r.width=v(d),f=p.outerWidth());I(C,L);I(function(a){z.push(a.innerHTML);Ua.push(v(h(a).css("width")))},L);I(function(a,b){if(h.inArray(a,Xb)!==-1)a.style.width=Ua[b]},o);h(L).height(0);u&&(I(C,R),I(function(a){A.push(a.innerHTML);y.push(v(h(a).css("width")))},R),I(function(a,b){a.style.width=y[b]},Q),h(R).height(0));I(function(a,b){a.innerHTML='<div class="dataTables_sizing">'+z[b]+"</div>";a.childNodes[0].style.height=
|
||||
"0";a.childNodes[0].style.overflow="hidden";a.style.width=Ua[b]},L);u&&I(function(a,b){a.innerHTML='<div class="dataTables_sizing">'+A[b]+"</div>";a.childNodes[0].style.height="0";a.childNodes[0].style.overflow="hidden";a.style.width=y[b]},R);if(p.outerWidth()<f){Q=j.scrollHeight>j.offsetHeight||"scroll"==l.css("overflow-y")?f+b:f;if(U&&(j.scrollHeight>j.offsetHeight||"scroll"==l.css("overflow-y")))r.width=v(Q-b);(""===c||""!==d)&&K(a,1,"Possible column misalignment",6)}else Q="100%";q.width=v(Q);
|
||||
g.width=v(Q);u&&(a.nScrollFoot.style.width=v(Q));!e&&U&&(q.height=v(s.offsetHeight+b));c=p.outerWidth();m[0].style.width=v(c);i.width=v(c);d=p.height()>j.clientHeight||"scroll"==l.css("overflow-y");e="padding"+(x.bScrollbarLeft?"Left":"Right");i[e]=d?b+"px":"0px";u&&(n[0].style.width=v(c),t[0].style.width=v(c),t[0].style[e]=d?b+"px":"0px");p.children("colgroup").insertBefore(p.children("thead"));l.scroll();if((a.bSorted||a.bFiltered)&&!a._drawHold)j.scrollTop=0}}function I(a,b,c){for(var d=0,e=0,
|
||||
f=b.length,g,j;e<f;){g=b[e].firstChild;for(j=c?c[e].firstChild:null;g;)1===g.nodeType&&(c?a(g,j,d):a(g,d),d++),g=g.nextSibling,j=c?j.nextSibling:null;e++}}function Fa(a){var b=a.nTable,c=a.aoColumns,d=a.oScroll,e=d.sY,f=d.sX,g=d.sXInner,j=c.length,i=ma(a,"bVisible"),m=h("th",a.nTHead),l=b.getAttribute("width"),k=b.parentNode,t=!1,n,o,p=a.oBrowser,d=p.bScrollOversize;(n=b.style.width)&&-1!==n.indexOf("%")&&(l=n);for(n=0;n<i.length;n++)o=c[i[n]],null!==o.sWidth&&(o.sWidth=Fb(o.sWidthOrig,k),t=!0);if(d||
|
||||
!t&&!f&&!e&&j==V(a)&&j==m.length)for(n=0;n<j;n++)i=aa(a,n),null!==i&&(c[i].sWidth=v(m.eq(n).width()));else{j=h(b).clone().css("visibility","hidden").removeAttr("id");j.find("tbody tr").remove();var s=h("<tr/>").appendTo(j.find("tbody"));j.find("thead, tfoot").remove();j.append(h(a.nTHead).clone()).append(h(a.nTFoot).clone());j.find("tfoot th, tfoot td").css("width","");m=ra(a,j.find("thead")[0]);for(n=0;n<i.length;n++)o=c[i[n]],m[n].style.width=null!==o.sWidthOrig&&""!==o.sWidthOrig?v(o.sWidthOrig):
|
||||
"",o.sWidthOrig&&f&&h(m[n]).append(h("<div/>").css({width:o.sWidthOrig,margin:0,padding:0,border:0,height:1}));if(a.aoData.length)for(n=0;n<i.length;n++)t=i[n],o=c[t],h(Gb(a,t)).clone(!1).append(o.sContentPadding).appendTo(s);h("[name]",j).removeAttr("name");o=h("<div/>").css(f||e?{position:"absolute",top:0,left:0,height:1,right:0,overflow:"hidden"}:{}).append(j).appendTo(k);f&&g?j.width(g):f?(j.css("width","auto"),j.removeAttr("width"),j.width()<k.clientWidth&&l&&j.width(k.clientWidth)):e?j.width(k.clientWidth):
|
||||
l&&j.width(l);for(n=e=0;n<i.length;n++)k=h(m[n]),g=k.outerWidth()-k.width(),k=p.bBounding?Math.ceil(m[n].getBoundingClientRect().width):k.outerWidth(),e+=k,c[i[n]].sWidth=v(k-g);b.style.width=v(e);o.remove()}l&&(b.style.width=v(l));if((l||f)&&!a._reszEvt)b=function(){h(E).on("resize.DT-"+a.sInstance,Oa(function(){$(a)}))},d?setTimeout(b,1E3):b(),a._reszEvt=!0}function Fb(a,b){if(!a)return 0;var c=h("<div/>").css("width",v(a)).appendTo(b||H.body),d=c[0].offsetWidth;c.remove();return d}function Gb(a,
|
||||
b){var c=Hb(a,b);if(0>c)return null;var d=a.aoData[c];return!d.nTr?h("<td/>").html(B(a,c,b,"display"))[0]:d.anCells[b]}function Hb(a,b){for(var c,d=-1,e=-1,f=0,g=a.aoData.length;f<g;f++)c=B(a,f,b,"display")+"",c=c.replace(Yb,""),c=c.replace(/ /g," "),c.length>d&&(d=c.length,e=f);return e}function v(a){return null===a?"0px":"number"==typeof a?0>a?"0px":a+"px":a.match(/\d$/)?a+"px":a}function X(a){var b,c,d=[],e=a.aoColumns,f,g,j,i;b=a.aaSortingFixed;c=h.isPlainObject(b);var m=[];f=function(a){a.length&&
|
||||
!h.isArray(a[0])?m.push(a):h.merge(m,a)};h.isArray(b)&&f(b);c&&b.pre&&f(b.pre);f(a.aaSorting);c&&b.post&&f(b.post);for(a=0;a<m.length;a++){i=m[a][0];f=e[i].aDataSort;b=0;for(c=f.length;b<c;b++)g=f[b],j=e[g].sType||"string",m[a]._idx===k&&(m[a]._idx=h.inArray(m[a][1],e[g].asSorting)),d.push({src:i,col:g,dir:m[a][1],index:m[a]._idx,type:j,formatter:n.ext.type.order[j+"-pre"]})}return d}function nb(a){var b,c,d=[],e=n.ext.type.order,f=a.aoData,g=0,j,i=a.aiDisplayMaster,h;Ga(a);h=X(a);b=0;for(c=h.length;b<
|
||||
c;b++)j=h[b],j.formatter&&g++,Ib(a,j.col);if("ssp"!=y(a)&&0!==h.length){b=0;for(c=i.length;b<c;b++)d[i[b]]=b;g===h.length?i.sort(function(a,b){var c,e,g,j,i=h.length,k=f[a]._aSortData,n=f[b]._aSortData;for(g=0;g<i;g++)if(j=h[g],c=k[j.col],e=n[j.col],c=c<e?-1:c>e?1:0,0!==c)return"asc"===j.dir?c:-c;c=d[a];e=d[b];return c<e?-1:c>e?1:0}):i.sort(function(a,b){var c,g,j,i,k=h.length,n=f[a]._aSortData,o=f[b]._aSortData;for(j=0;j<k;j++)if(i=h[j],c=n[i.col],g=o[i.col],i=e[i.type+"-"+i.dir]||e["string-"+i.dir],
|
||||
c=i(c,g),0!==c)return c;c=d[a];g=d[b];return c<g?-1:c>g?1:0})}a.bSorted=!0}function Jb(a){for(var b,c,d=a.aoColumns,e=X(a),a=a.oLanguage.oAria,f=0,g=d.length;f<g;f++){c=d[f];var j=c.asSorting;b=c.sTitle.replace(/<.*?>/g,"");var i=c.nTh;i.removeAttribute("aria-sort");c.bSortable&&(0<e.length&&e[0].col==f?(i.setAttribute("aria-sort","asc"==e[0].dir?"ascending":"descending"),c=j[e[0].index+1]||j[0]):c=j[0],b+="asc"===c?a.sSortAscending:a.sSortDescending);i.setAttribute("aria-label",b)}}function Va(a,
|
||||
b,c,d){var e=a.aaSorting,f=a.aoColumns[b].asSorting,g=function(a,b){var c=a._idx;c===k&&(c=h.inArray(a[1],f));return c+1<f.length?c+1:b?null:0};"number"===typeof e[0]&&(e=a.aaSorting=[e]);c&&a.oFeatures.bSortMulti?(c=h.inArray(b,D(e,"0")),-1!==c?(b=g(e[c],!0),null===b&&1===e.length&&(b=0),null===b?e.splice(c,1):(e[c][1]=f[b],e[c]._idx=b)):(e.push([b,f[0],0]),e[e.length-1]._idx=0)):e.length&&e[0][0]==b?(b=g(e[0]),e.length=1,e[0][1]=f[b],e[0]._idx=b):(e.length=0,e.push([b,f[0]]),e[0]._idx=0);T(a);"function"==
|
||||
typeof d&&d(a)}function Ma(a,b,c,d){var e=a.aoColumns[c];Wa(b,{},function(b){!1!==e.bSortable&&(a.oFeatures.bProcessing?(C(a,!0),setTimeout(function(){Va(a,c,b.shiftKey,d);"ssp"!==y(a)&&C(a,!1)},0)):Va(a,c,b.shiftKey,d))})}function wa(a){var b=a.aLastSort,c=a.oClasses.sSortColumn,d=X(a),e=a.oFeatures,f,g;if(e.bSort&&e.bSortClasses){e=0;for(f=b.length;e<f;e++)g=b[e].src,h(D(a.aoData,"anCells",g)).removeClass(c+(2>e?e+1:3));e=0;for(f=d.length;e<f;e++)g=d[e].src,h(D(a.aoData,"anCells",g)).addClass(c+
|
||||
(2>e?e+1:3))}a.aLastSort=d}function Ib(a,b){var c=a.aoColumns[b],d=n.ext.order[c.sSortDataType],e;d&&(e=d.call(a.oInstance,a,b,ba(a,b)));for(var f,g=n.ext.type.order[c.sType+"-pre"],j=0,i=a.aoData.length;j<i;j++)if(c=a.aoData[j],c._aSortData||(c._aSortData=[]),!c._aSortData[b]||d)f=d?e[j]:B(a,j,b,"sort"),c._aSortData[b]=g?g(f):f}function xa(a){if(a.oFeatures.bStateSave&&!a.bDestroying){var b={time:+new Date,start:a._iDisplayStart,length:a._iDisplayLength,order:h.extend(!0,[],a.aaSorting),search:Bb(a.oPreviousSearch),
|
||||
columns:h.map(a.aoColumns,function(b,d){return{visible:b.bVisible,search:Bb(a.aoPreSearchCols[d])}})};r(a,"aoStateSaveParams","stateSaveParams",[a,b]);a.oSavedState=b;a.fnStateSaveCallback.call(a.oInstance,a,b)}}function Kb(a,b,c){var d,e,f=a.aoColumns,b=function(b){if(b&&b.time){var g=r(a,"aoStateLoadParams","stateLoadParams",[a,b]);if(-1===h.inArray(!1,g)&&(g=a.iStateDuration,!(0<g&&b.time<+new Date-1E3*g)&&!(b.columns&&f.length!==b.columns.length))){a.oLoadedState=h.extend(!0,{},b);b.start!==k&&
|
||||
(a._iDisplayStart=b.start,a.iInitDisplayStart=b.start);b.length!==k&&(a._iDisplayLength=b.length);b.order!==k&&(a.aaSorting=[],h.each(b.order,function(b,c){a.aaSorting.push(c[0]>=f.length?[0,c[1]]:c)}));b.search!==k&&h.extend(a.oPreviousSearch,Cb(b.search));if(b.columns){d=0;for(e=b.columns.length;d<e;d++)g=b.columns[d],g.visible!==k&&(f[d].bVisible=g.visible),g.search!==k&&h.extend(a.aoPreSearchCols[d],Cb(g.search))}r(a,"aoStateLoaded","stateLoaded",[a,b])}}c()};if(a.oFeatures.bStateSave){var g=
|
||||
a.fnStateLoadCallback.call(a.oInstance,a,b);g!==k&&b(g)}else c()}function ya(a){var b=n.settings,a=h.inArray(a,D(b,"nTable"));return-1!==a?b[a]:null}function K(a,b,c,d){c="DataTables warning: "+(a?"table id="+a.sTableId+" - ":"")+c;d&&(c+=". For more information about this error, please see http://datatables.net/tn/"+d);if(b)E.console&&console.log&&console.log(c);else if(b=n.ext,b=b.sErrMode||b.errMode,a&&r(a,null,"error",[a,d,c]),"alert"==b)alert(c);else{if("throw"==b)throw Error(c);"function"==
|
||||
typeof b&&b(a,d,c)}}function F(a,b,c,d){h.isArray(c)?h.each(c,function(c,d){h.isArray(d)?F(a,b,d[0],d[1]):F(a,b,d)}):(d===k&&(d=c),b[c]!==k&&(a[d]=b[c]))}function Xa(a,b,c){var d,e;for(e in b)b.hasOwnProperty(e)&&(d=b[e],h.isPlainObject(d)?(h.isPlainObject(a[e])||(a[e]={}),h.extend(!0,a[e],d)):a[e]=c&&"data"!==e&&"aaData"!==e&&h.isArray(d)?d.slice():d);return a}function Wa(a,b,c){h(a).on("click.DT",b,function(b){h(a).blur();c(b)}).on("keypress.DT",b,function(a){13===a.which&&(a.preventDefault(),c(a))}).on("selectstart.DT",
|
||||
function(){return!1})}function z(a,b,c,d){c&&a[b].push({fn:c,sName:d})}function r(a,b,c,d){var e=[];b&&(e=h.map(a[b].slice().reverse(),function(b){return b.fn.apply(a.oInstance,d)}));null!==c&&(b=h.Event(c+".dt"),h(a.nTable).trigger(b,d),e.push(b.result));return e}function Sa(a){var b=a._iDisplayStart,c=a.fnDisplayEnd(),d=a._iDisplayLength;b>=c&&(b=c-d);b-=b%d;if(-1===d||0>b)b=0;a._iDisplayStart=b}function Na(a,b){var c=a.renderer,d=n.ext.renderer[b];return h.isPlainObject(c)&&c[b]?d[c[b]]||d._:"string"===
|
||||
typeof c?d[c]||d._:d._}function y(a){return a.oFeatures.bServerSide?"ssp":a.ajax||a.sAjaxSource?"ajax":"dom"}function ia(a,b){var c=[],c=Lb.numbers_length,d=Math.floor(c/2);b<=c?c=Y(0,b):a<=d?(c=Y(0,c-2),c.push("ellipsis"),c.push(b-1)):(a>=b-1-d?c=Y(b-(c-2),b):(c=Y(a-d+2,a+d-1),c.push("ellipsis"),c.push(b-1)),c.splice(0,0,"ellipsis"),c.splice(0,0,0));c.DT_el="span";return c}function Da(a){h.each({num:function(b){return za(b,a)},"num-fmt":function(b){return za(b,a,Ya)},"html-num":function(b){return za(b,
|
||||
a,Aa)},"html-num-fmt":function(b){return za(b,a,Aa,Ya)}},function(b,c){x.type.order[b+a+"-pre"]=c;b.match(/^html\-/)&&(x.type.search[b+a]=x.type.search.html)})}function Mb(a){return function(){var b=[ya(this[n.ext.iApiIndex])].concat(Array.prototype.slice.call(arguments));return n.ext.internal[a].apply(this,b)}}var n=function(a){this.$=function(a,b){return this.api(!0).$(a,b)};this._=function(a,b){return this.api(!0).rows(a,b).data()};this.api=function(a){return a?new s(ya(this[x.iApiIndex])):new s(this)};
|
||||
this.fnAddData=function(a,b){var c=this.api(!0),d=h.isArray(a)&&(h.isArray(a[0])||h.isPlainObject(a[0]))?c.rows.add(a):c.row.add(a);(b===k||b)&&c.draw();return d.flatten().toArray()};this.fnAdjustColumnSizing=function(a){var b=this.api(!0).columns.adjust(),c=b.settings()[0],d=c.oScroll;a===k||a?b.draw(!1):(""!==d.sX||""!==d.sY)&&la(c)};this.fnClearTable=function(a){var b=this.api(!0).clear();(a===k||a)&&b.draw()};this.fnClose=function(a){this.api(!0).row(a).child.hide()};this.fnDeleteRow=function(a,
|
||||
b,c){var d=this.api(!0),a=d.rows(a),e=a.settings()[0],h=e.aoData[a[0][0]];a.remove();b&&b.call(this,e,h);(c===k||c)&&d.draw();return h};this.fnDestroy=function(a){this.api(!0).destroy(a)};this.fnDraw=function(a){this.api(!0).draw(a)};this.fnFilter=function(a,b,c,d,e,h){e=this.api(!0);null===b||b===k?e.search(a,c,d,h):e.column(b).search(a,c,d,h);e.draw()};this.fnGetData=function(a,b){var c=this.api(!0);if(a!==k){var d=a.nodeName?a.nodeName.toLowerCase():"";return b!==k||"td"==d||"th"==d?c.cell(a,b).data():
|
||||
c.row(a).data()||null}return c.data().toArray()};this.fnGetNodes=function(a){var b=this.api(!0);return a!==k?b.row(a).node():b.rows().nodes().flatten().toArray()};this.fnGetPosition=function(a){var b=this.api(!0),c=a.nodeName.toUpperCase();return"TR"==c?b.row(a).index():"TD"==c||"TH"==c?(a=b.cell(a).index(),[a.row,a.columnVisible,a.column]):null};this.fnIsOpen=function(a){return this.api(!0).row(a).child.isShown()};this.fnOpen=function(a,b,c){return this.api(!0).row(a).child(b,c).show().child()[0]};
|
||||
this.fnPageChange=function(a,b){var c=this.api(!0).page(a);(b===k||b)&&c.draw(!1)};this.fnSetColumnVis=function(a,b,c){a=this.api(!0).column(a).visible(b);(c===k||c)&&a.columns.adjust().draw()};this.fnSettings=function(){return ya(this[x.iApiIndex])};this.fnSort=function(a){this.api(!0).order(a).draw()};this.fnSortListener=function(a,b,c){this.api(!0).order.listener(a,b,c)};this.fnUpdate=function(a,b,c,d,e){var h=this.api(!0);c===k||null===c?h.row(b).data(a):h.cell(b,c).data(a);(e===k||e)&&h.columns.adjust();
|
||||
(d===k||d)&&h.draw();return 0};this.fnVersionCheck=x.fnVersionCheck;var b=this,c=a===k,d=this.length;c&&(a={});this.oApi=this.internal=x.internal;for(var e in n.ext.internal)e&&(this[e]=Mb(e));this.each(function(){var e={},g=1<d?Xa(e,a,!0):a,j=0,i,e=this.getAttribute("id"),m=!1,l=n.defaults,q=h(this);if("table"!=this.nodeName.toLowerCase())K(null,0,"Non-table node initialisation ("+this.nodeName+")",2);else{fb(l);gb(l.column);J(l,l,!0);J(l.column,l.column,!0);J(l,h.extend(g,q.data()));var t=n.settings,
|
||||
j=0;for(i=t.length;j<i;j++){var o=t[j];if(o.nTable==this||o.nTHead&&o.nTHead.parentNode==this||o.nTFoot&&o.nTFoot.parentNode==this){var s=g.bRetrieve!==k?g.bRetrieve:l.bRetrieve;if(c||s)return o.oInstance;if(g.bDestroy!==k?g.bDestroy:l.bDestroy){o.oInstance.fnDestroy();break}else{K(o,0,"Cannot reinitialise DataTable",3);return}}if(o.sTableId==this.id){t.splice(j,1);break}}if(null===e||""===e)this.id=e="DataTables_Table_"+n.ext._unique++;var p=h.extend(!0,{},n.models.oSettings,{sDestroyWidth:q[0].style.width,
|
||||
sInstance:e,sTableId:e});p.nTable=this;p.oApi=b.internal;p.oInit=g;t.push(p);p.oInstance=1===b.length?b:q.dataTable();fb(g);Ca(g.oLanguage);g.aLengthMenu&&!g.iDisplayLength&&(g.iDisplayLength=h.isArray(g.aLengthMenu[0])?g.aLengthMenu[0][0]:g.aLengthMenu[0]);g=Xa(h.extend(!0,{},l),g);F(p.oFeatures,g,"bPaginate bLengthChange bFilter bSort bSortMulti bInfo bProcessing bAutoWidth bSortClasses bServerSide bDeferRender".split(" "));F(p,g,["asStripeClasses","ajax","fnServerData","fnFormatNumber","sServerMethod",
|
||||
"aaSorting","aaSortingFixed","aLengthMenu","sPaginationType","sAjaxSource","sAjaxDataProp","iStateDuration","sDom","bSortCellsTop","iTabIndex","fnStateLoadCallback","fnStateSaveCallback","renderer","searchDelay","rowId",["iCookieDuration","iStateDuration"],["oSearch","oPreviousSearch"],["aoSearchCols","aoPreSearchCols"],["iDisplayLength","_iDisplayLength"]]);F(p.oScroll,g,[["sScrollX","sX"],["sScrollXInner","sXInner"],["sScrollY","sY"],["bScrollCollapse","bCollapse"]]);F(p.oLanguage,g,"fnInfoCallback");
|
||||
z(p,"aoDrawCallback",g.fnDrawCallback,"user");z(p,"aoServerParams",g.fnServerParams,"user");z(p,"aoStateSaveParams",g.fnStateSaveParams,"user");z(p,"aoStateLoadParams",g.fnStateLoadParams,"user");z(p,"aoStateLoaded",g.fnStateLoaded,"user");z(p,"aoRowCallback",g.fnRowCallback,"user");z(p,"aoRowCreatedCallback",g.fnCreatedRow,"user");z(p,"aoHeaderCallback",g.fnHeaderCallback,"user");z(p,"aoFooterCallback",g.fnFooterCallback,"user");z(p,"aoInitComplete",g.fnInitComplete,"user");z(p,"aoPreDrawCallback",
|
||||
g.fnPreDrawCallback,"user");p.rowIdFn=S(g.rowId);hb(p);var u=p.oClasses;h.extend(u,n.ext.classes,g.oClasses);q.addClass(u.sTable);p.iInitDisplayStart===k&&(p.iInitDisplayStart=g.iDisplayStart,p._iDisplayStart=g.iDisplayStart);null!==g.iDeferLoading&&(p.bDeferLoading=!0,e=h.isArray(g.iDeferLoading),p._iRecordsDisplay=e?g.iDeferLoading[0]:g.iDeferLoading,p._iRecordsTotal=e?g.iDeferLoading[1]:g.iDeferLoading);var v=p.oLanguage;h.extend(!0,v,g.oLanguage);v.sUrl&&(h.ajax({dataType:"json",url:v.sUrl,success:function(a){Ca(a);
|
||||
J(l.oLanguage,a);h.extend(true,v,a);ha(p)},error:function(){ha(p)}}),m=!0);null===g.asStripeClasses&&(p.asStripeClasses=[u.sStripeOdd,u.sStripeEven]);var e=p.asStripeClasses,x=q.children("tbody").find("tr").eq(0);-1!==h.inArray(!0,h.map(e,function(a){return x.hasClass(a)}))&&(h("tbody tr",this).removeClass(e.join(" ")),p.asDestroyStripes=e.slice());e=[];t=this.getElementsByTagName("thead");0!==t.length&&(ea(p.aoHeader,t[0]),e=ra(p));if(null===g.aoColumns){t=[];j=0;for(i=e.length;j<i;j++)t.push(null)}else t=
|
||||
g.aoColumns;j=0;for(i=t.length;j<i;j++)Ea(p,e?e[j]:null);jb(p,g.aoColumnDefs,t,function(a,b){ka(p,a,b)});if(x.length){var w=function(a,b){return a.getAttribute("data-"+b)!==null?b:null};h(x[0]).children("th, td").each(function(a,b){var c=p.aoColumns[a];if(c.mData===a){var d=w(b,"sort")||w(b,"order"),e=w(b,"filter")||w(b,"search");if(d!==null||e!==null){c.mData={_:a+".display",sort:d!==null?a+".@data-"+d:k,type:d!==null?a+".@data-"+d:k,filter:e!==null?a+".@data-"+e:k};ka(p,a)}}})}var U=p.oFeatures,
|
||||
e=function(){if(g.aaSorting===k){var a=p.aaSorting;j=0;for(i=a.length;j<i;j++)a[j][1]=p.aoColumns[j].asSorting[0]}wa(p);U.bSort&&z(p,"aoDrawCallback",function(){if(p.bSorted){var a=X(p),b={};h.each(a,function(a,c){b[c.src]=c.dir});r(p,null,"order",[p,a,b]);Jb(p)}});z(p,"aoDrawCallback",function(){(p.bSorted||y(p)==="ssp"||U.bDeferRender)&&wa(p)},"sc");var a=q.children("caption").each(function(){this._captionSide=h(this).css("caption-side")}),b=q.children("thead");b.length===0&&(b=h("<thead/>").appendTo(q));
|
||||
p.nTHead=b[0];b=q.children("tbody");b.length===0&&(b=h("<tbody/>").appendTo(q));p.nTBody=b[0];b=q.children("tfoot");if(b.length===0&&a.length>0&&(p.oScroll.sX!==""||p.oScroll.sY!==""))b=h("<tfoot/>").appendTo(q);if(b.length===0||b.children().length===0)q.addClass(u.sNoFooter);else if(b.length>0){p.nTFoot=b[0];ea(p.aoFooter,p.nTFoot)}if(g.aaData)for(j=0;j<g.aaData.length;j++)O(p,g.aaData[j]);else(p.bDeferLoading||y(p)=="dom")&&na(p,h(p.nTBody).children("tr"));p.aiDisplay=p.aiDisplayMaster.slice();
|
||||
p.bInitialised=true;m===false&&ha(p)};g.bStateSave?(U.bStateSave=!0,z(p,"aoDrawCallback",xa,"state_save"),Kb(p,g,e)):e()}});b=null;return this},x,s,o,u,Za={},Nb=/[\r\n]/g,Aa=/<.*?>/g,Zb=/^\d{2,4}[\.\/\-]\d{1,2}[\.\/\-]\d{1,2}([T ]{1}\d{1,2}[:\.]\d{2}([\.:]\d{2})?)?$/,$b=RegExp("(\\/|\\.|\\*|\\+|\\?|\\||\\(|\\)|\\[|\\]|\\{|\\}|\\\\|\\$|\\^|\\-)","g"),Ya=/[',$£€¥%\u2009\u202F\u20BD\u20a9\u20BArfkɃΞ]/gi,M=function(a){return!a||!0===a||"-"===a?!0:!1},Ob=function(a){var b=parseInt(a,10);return!isNaN(b)&&
|
||||
isFinite(a)?b:null},Pb=function(a,b){Za[b]||(Za[b]=RegExp(Qa(b),"g"));return"string"===typeof a&&"."!==b?a.replace(/\./g,"").replace(Za[b],"."):a},$a=function(a,b,c){var d="string"===typeof a;if(M(a))return!0;b&&d&&(a=Pb(a,b));c&&d&&(a=a.replace(Ya,""));return!isNaN(parseFloat(a))&&isFinite(a)},Qb=function(a,b,c){return M(a)?!0:!(M(a)||"string"===typeof a)?null:$a(a.replace(Aa,""),b,c)?!0:null},D=function(a,b,c){var d=[],e=0,f=a.length;if(c!==k)for(;e<f;e++)a[e]&&a[e][b]&&d.push(a[e][b][c]);else for(;e<
|
||||
f;e++)a[e]&&d.push(a[e][b]);return d},ja=function(a,b,c,d){var e=[],f=0,g=b.length;if(d!==k)for(;f<g;f++)a[b[f]][c]&&e.push(a[b[f]][c][d]);else for(;f<g;f++)e.push(a[b[f]][c]);return e},Y=function(a,b){var c=[],d;b===k?(b=0,d=a):(d=b,b=a);for(var e=b;e<d;e++)c.push(e);return c},Rb=function(a){for(var b=[],c=0,d=a.length;c<d;c++)a[c]&&b.push(a[c]);return b},qa=function(a){var b;a:{if(!(2>a.length)){b=a.slice().sort();for(var c=b[0],d=1,e=b.length;d<e;d++){if(b[d]===c){b=!1;break a}c=b[d]}}b=!0}if(b)return a.slice();
|
||||
b=[];var e=a.length,f,g=0,d=0;a:for(;d<e;d++){c=a[d];for(f=0;f<g;f++)if(b[f]===c)continue a;b.push(c);g++}return b};n.util={throttle:function(a,b){var c=b!==k?b:200,d,e;return function(){var b=this,g=+new Date,j=arguments;d&&g<d+c?(clearTimeout(e),e=setTimeout(function(){d=k;a.apply(b,j)},c)):(d=g,a.apply(b,j))}},escapeRegex:function(a){return a.replace($b,"\\$1")}};var A=function(a,b,c){a[b]!==k&&(a[c]=a[b])},ca=/\[.*?\]$/,W=/\(\)$/,Qa=n.util.escapeRegex,va=h("<div>")[0],Wb=va.textContent!==k,Yb=
|
||||
/<.*?>/g,Oa=n.util.throttle,Sb=[],w=Array.prototype,ac=function(a){var b,c,d=n.settings,e=h.map(d,function(a){return a.nTable});if(a){if(a.nTable&&a.oApi)return[a];if(a.nodeName&&"table"===a.nodeName.toLowerCase())return b=h.inArray(a,e),-1!==b?[d[b]]:null;if(a&&"function"===typeof a.settings)return a.settings().toArray();"string"===typeof a?c=h(a):a instanceof h&&(c=a)}else return[];if(c)return c.map(function(){b=h.inArray(this,e);return-1!==b?d[b]:null}).toArray()};s=function(a,b){if(!(this instanceof
|
||||
s))return new s(a,b);var c=[],d=function(a){(a=ac(a))&&(c=c.concat(a))};if(h.isArray(a))for(var e=0,f=a.length;e<f;e++)d(a[e]);else d(a);this.context=qa(c);b&&h.merge(this,b);this.selector={rows:null,cols:null,opts:null};s.extend(this,this,Sb)};n.Api=s;h.extend(s.prototype,{any:function(){return 0!==this.count()},concat:w.concat,context:[],count:function(){return this.flatten().length},each:function(a){for(var b=0,c=this.length;b<c;b++)a.call(this,this[b],b,this);return this},eq:function(a){var b=
|
||||
this.context;return b.length>a?new s(b[a],this[a]):null},filter:function(a){var b=[];if(w.filter)b=w.filter.call(this,a,this);else for(var c=0,d=this.length;c<d;c++)a.call(this,this[c],c,this)&&b.push(this[c]);return new s(this.context,b)},flatten:function(){var a=[];return new s(this.context,a.concat.apply(a,this.toArray()))},join:w.join,indexOf:w.indexOf||function(a,b){for(var c=b||0,d=this.length;c<d;c++)if(this[c]===a)return c;return-1},iterator:function(a,b,c,d){var e=[],f,g,j,h,m,l=this.context,
|
||||
n,o,u=this.selector;"string"===typeof a&&(d=c,c=b,b=a,a=!1);g=0;for(j=l.length;g<j;g++){var r=new s(l[g]);if("table"===b)f=c.call(r,l[g],g),f!==k&&e.push(f);else if("columns"===b||"rows"===b)f=c.call(r,l[g],this[g],g),f!==k&&e.push(f);else if("column"===b||"column-rows"===b||"row"===b||"cell"===b){o=this[g];"column-rows"===b&&(n=Ba(l[g],u.opts));h=0;for(m=o.length;h<m;h++)f=o[h],f="cell"===b?c.call(r,l[g],f.row,f.column,g,h):c.call(r,l[g],f,g,h,n),f!==k&&e.push(f)}}return e.length||d?(a=new s(l,a?
|
||||
e.concat.apply([],e):e),b=a.selector,b.rows=u.rows,b.cols=u.cols,b.opts=u.opts,a):this},lastIndexOf:w.lastIndexOf||function(a,b){return this.indexOf.apply(this.toArray.reverse(),arguments)},length:0,map:function(a){var b=[];if(w.map)b=w.map.call(this,a,this);else for(var c=0,d=this.length;c<d;c++)b.push(a.call(this,this[c],c));return new s(this.context,b)},pluck:function(a){return this.map(function(b){return b[a]})},pop:w.pop,push:w.push,reduce:w.reduce||function(a,b){return ib(this,a,b,0,this.length,
|
||||
1)},reduceRight:w.reduceRight||function(a,b){return ib(this,a,b,this.length-1,-1,-1)},reverse:w.reverse,selector:null,shift:w.shift,slice:function(){return new s(this.context,this)},sort:w.sort,splice:w.splice,toArray:function(){return w.slice.call(this)},to$:function(){return h(this)},toJQuery:function(){return h(this)},unique:function(){return new s(this.context,qa(this))},unshift:w.unshift});s.extend=function(a,b,c){if(c.length&&b&&(b instanceof s||b.__dt_wrapper)){var d,e,f,g=function(a,b,c){return function(){var d=
|
||||
b.apply(a,arguments);s.extend(d,d,c.methodExt);return d}};d=0;for(e=c.length;d<e;d++)f=c[d],b[f.name]="function"===typeof f.val?g(a,f.val,f):h.isPlainObject(f.val)?{}:f.val,b[f.name].__dt_wrapper=!0,s.extend(a,b[f.name],f.propExt)}};s.register=o=function(a,b){if(h.isArray(a))for(var c=0,d=a.length;c<d;c++)s.register(a[c],b);else for(var e=a.split("."),f=Sb,g,j,c=0,d=e.length;c<d;c++){g=(j=-1!==e[c].indexOf("()"))?e[c].replace("()",""):e[c];var i;a:{i=0;for(var m=f.length;i<m;i++)if(f[i].name===g){i=
|
||||
f[i];break a}i=null}i||(i={name:g,val:{},methodExt:[],propExt:[]},f.push(i));c===d-1?i.val=b:f=j?i.methodExt:i.propExt}};s.registerPlural=u=function(a,b,c){s.register(a,c);s.register(b,function(){var a=c.apply(this,arguments);return a===this?this:a instanceof s?a.length?h.isArray(a[0])?new s(a.context,a[0]):a[0]:k:a})};o("tables()",function(a){var b;if(a){b=s;var c=this.context;if("number"===typeof a)a=[c[a]];else var d=h.map(c,function(a){return a.nTable}),a=h(d).filter(a).map(function(){var a=h.inArray(this,
|
||||
d);return c[a]}).toArray();b=new b(a)}else b=this;return b});o("table()",function(a){var a=this.tables(a),b=a.context;return b.length?new s(b[0]):a});u("tables().nodes()","table().node()",function(){return this.iterator("table",function(a){return a.nTable},1)});u("tables().body()","table().body()",function(){return this.iterator("table",function(a){return a.nTBody},1)});u("tables().header()","table().header()",function(){return this.iterator("table",function(a){return a.nTHead},1)});u("tables().footer()",
|
||||
"table().footer()",function(){return this.iterator("table",function(a){return a.nTFoot},1)});u("tables().containers()","table().container()",function(){return this.iterator("table",function(a){return a.nTableWrapper},1)});o("draw()",function(a){return this.iterator("table",function(b){"page"===a?P(b):("string"===typeof a&&(a="full-hold"===a?!1:!0),T(b,!1===a))})});o("page()",function(a){return a===k?this.page.info().page:this.iterator("table",function(b){Ta(b,a)})});o("page.info()",function(){if(0===
|
||||
this.context.length)return k;var a=this.context[0],b=a._iDisplayStart,c=a.oFeatures.bPaginate?a._iDisplayLength:-1,d=a.fnRecordsDisplay(),e=-1===c;return{page:e?0:Math.floor(b/c),pages:e?1:Math.ceil(d/c),start:b,end:a.fnDisplayEnd(),length:c,recordsTotal:a.fnRecordsTotal(),recordsDisplay:d,serverSide:"ssp"===y(a)}});o("page.len()",function(a){return a===k?0!==this.context.length?this.context[0]._iDisplayLength:k:this.iterator("table",function(b){Ra(b,a)})});var Tb=function(a,b,c){if(c){var d=new s(a);
|
||||
d.one("draw",function(){c(d.ajax.json())})}if("ssp"==y(a))T(a,b);else{C(a,!0);var e=a.jqXHR;e&&4!==e.readyState&&e.abort();sa(a,[],function(c){oa(a);for(var c=ta(a,c),d=0,e=c.length;d<e;d++)O(a,c[d]);T(a,b);C(a,!1)})}};o("ajax.json()",function(){var a=this.context;if(0<a.length)return a[0].json});o("ajax.params()",function(){var a=this.context;if(0<a.length)return a[0].oAjaxData});o("ajax.reload()",function(a,b){return this.iterator("table",function(c){Tb(c,!1===b,a)})});o("ajax.url()",function(a){var b=
|
||||
this.context;if(a===k){if(0===b.length)return k;b=b[0];return b.ajax?h.isPlainObject(b.ajax)?b.ajax.url:b.ajax:b.sAjaxSource}return this.iterator("table",function(b){h.isPlainObject(b.ajax)?b.ajax.url=a:b.ajax=a})});o("ajax.url().load()",function(a,b){return this.iterator("table",function(c){Tb(c,!1===b,a)})});var ab=function(a,b,c,d,e){var f=[],g,j,i,m,l,n;i=typeof b;if(!b||"string"===i||"function"===i||b.length===k)b=[b];i=0;for(m=b.length;i<m;i++){j=b[i]&&b[i].split&&!b[i].match(/[\[\(:]/)?b[i].split(","):
|
||||
[b[i]];l=0;for(n=j.length;l<n;l++)(g=c("string"===typeof j[l]?h.trim(j[l]):j[l]))&&g.length&&(f=f.concat(g))}a=x.selector[a];if(a.length){i=0;for(m=a.length;i<m;i++)f=a[i](d,e,f)}return qa(f)},bb=function(a){a||(a={});a.filter&&a.search===k&&(a.search=a.filter);return h.extend({search:"none",order:"current",page:"all"},a)},cb=function(a){for(var b=0,c=a.length;b<c;b++)if(0<a[b].length)return a[0]=a[b],a[0].length=1,a.length=1,a.context=[a.context[b]],a;a.length=0;return a},Ba=function(a,b){var c,
|
||||
d,e,f=[],g=a.aiDisplay;e=a.aiDisplayMaster;var j=b.search;c=b.order;d=b.page;if("ssp"==y(a))return"removed"===j?[]:Y(0,e.length);if("current"==d){c=a._iDisplayStart;for(d=a.fnDisplayEnd();c<d;c++)f.push(g[c])}else if("current"==c||"applied"==c)if("none"==j)f=e.slice();else if("applied"==j)f=g.slice();else{if("removed"==j){var i={};c=0;for(d=g.length;c<d;c++)i[g[c]]=null;f=h.map(e,function(a){return!i.hasOwnProperty(a)?a:null})}}else if("index"==c||"original"==c){c=0;for(d=a.aoData.length;c<d;c++)"none"==
|
||||
j?f.push(c):(e=h.inArray(c,g),(-1===e&&"removed"==j||0<=e&&"applied"==j)&&f.push(c))}return f};o("rows()",function(a,b){a===k?a="":h.isPlainObject(a)&&(b=a,a="");var b=bb(b),c=this.iterator("table",function(c){var e=b,f;return ab("row",a,function(a){var b=Ob(a),i=c.aoData;if(b!==null&&!e)return[b];f||(f=Ba(c,e));if(b!==null&&h.inArray(b,f)!==-1)return[b];if(a===null||a===k||a==="")return f;if(typeof a==="function")return h.map(f,function(b){var c=i[b];return a(b,c._aData,c.nTr)?b:null});if(a.nodeName){var b=
|
||||
a._DT_RowIndex,m=a._DT_CellIndex;if(b!==k)return i[b]&&i[b].nTr===a?[b]:[];if(m)return i[m.row]&&i[m.row].nTr===a?[m.row]:[];b=h(a).closest("*[data-dt-row]");return b.length?[b.data("dt-row")]:[]}if(typeof a==="string"&&a.charAt(0)==="#"){b=c.aIds[a.replace(/^#/,"")];if(b!==k)return[b.idx]}b=Rb(ja(c.aoData,f,"nTr"));return h(b).filter(a).map(function(){return this._DT_RowIndex}).toArray()},c,e)},1);c.selector.rows=a;c.selector.opts=b;return c});o("rows().nodes()",function(){return this.iterator("row",
|
||||
function(a,b){return a.aoData[b].nTr||k},1)});o("rows().data()",function(){return this.iterator(!0,"rows",function(a,b){return ja(a.aoData,b,"_aData")},1)});u("rows().cache()","row().cache()",function(a){return this.iterator("row",function(b,c){var d=b.aoData[c];return"search"===a?d._aFilterData:d._aSortData},1)});u("rows().invalidate()","row().invalidate()",function(a){return this.iterator("row",function(b,c){da(b,c,a)})});u("rows().indexes()","row().index()",function(){return this.iterator("row",
|
||||
function(a,b){return b},1)});u("rows().ids()","row().id()",function(a){for(var b=[],c=this.context,d=0,e=c.length;d<e;d++)for(var f=0,g=this[d].length;f<g;f++){var h=c[d].rowIdFn(c[d].aoData[this[d][f]]._aData);b.push((!0===a?"#":"")+h)}return new s(c,b)});u("rows().remove()","row().remove()",function(){var a=this;this.iterator("row",function(b,c,d){var e=b.aoData,f=e[c],g,h,i,m,l;e.splice(c,1);g=0;for(h=e.length;g<h;g++)if(i=e[g],l=i.anCells,null!==i.nTr&&(i.nTr._DT_RowIndex=g),null!==l){i=0;for(m=
|
||||
l.length;i<m;i++)l[i]._DT_CellIndex.row=g}pa(b.aiDisplayMaster,c);pa(b.aiDisplay,c);pa(a[d],c,!1);0<b._iRecordsDisplay&&b._iRecordsDisplay--;Sa(b);c=b.rowIdFn(f._aData);c!==k&&delete b.aIds[c]});this.iterator("table",function(a){for(var c=0,d=a.aoData.length;c<d;c++)a.aoData[c].idx=c});return this});o("rows.add()",function(a){var b=this.iterator("table",function(b){var c,f,g,h=[];f=0;for(g=a.length;f<g;f++)c=a[f],c.nodeName&&"TR"===c.nodeName.toUpperCase()?h.push(na(b,c)[0]):h.push(O(b,c));return h},
|
||||
1),c=this.rows(-1);c.pop();h.merge(c,b);return c});o("row()",function(a,b){return cb(this.rows(a,b))});o("row().data()",function(a){var b=this.context;if(a===k)return b.length&&this.length?b[0].aoData[this[0]]._aData:k;var c=b[0].aoData[this[0]];c._aData=a;h.isArray(a)&&c.nTr.id&&N(b[0].rowId)(a,c.nTr.id);da(b[0],this[0],"data");return this});o("row().node()",function(){var a=this.context;return a.length&&this.length?a[0].aoData[this[0]].nTr||null:null});o("row.add()",function(a){a instanceof h&&
|
||||
a.length&&(a=a[0]);var b=this.iterator("table",function(b){return a.nodeName&&"TR"===a.nodeName.toUpperCase()?na(b,a)[0]:O(b,a)});return this.row(b[0])});var db=function(a,b){var c=a.context;if(c.length&&(c=c[0].aoData[b!==k?b:a[0]])&&c._details)c._details.remove(),c._detailsShow=k,c._details=k},Ub=function(a,b){var c=a.context;if(c.length&&a.length){var d=c[0].aoData[a[0]];if(d._details){(d._detailsShow=b)?d._details.insertAfter(d.nTr):d._details.detach();var e=c[0],f=new s(e),g=e.aoData;f.off("draw.dt.DT_details column-visibility.dt.DT_details destroy.dt.DT_details");
|
||||
0<D(g,"_details").length&&(f.on("draw.dt.DT_details",function(a,b){e===b&&f.rows({page:"current"}).eq(0).each(function(a){a=g[a];a._detailsShow&&a._details.insertAfter(a.nTr)})}),f.on("column-visibility.dt.DT_details",function(a,b){if(e===b)for(var c,d=V(b),f=0,h=g.length;f<h;f++)c=g[f],c._details&&c._details.children("td[colspan]").attr("colspan",d)}),f.on("destroy.dt.DT_details",function(a,b){if(e===b)for(var c=0,d=g.length;c<d;c++)g[c]._details&&db(f,c)}))}}};o("row().child()",function(a,b){var c=
|
||||
this.context;if(a===k)return c.length&&this.length?c[0].aoData[this[0]]._details:k;if(!0===a)this.child.show();else if(!1===a)db(this);else if(c.length&&this.length){var d=c[0],c=c[0].aoData[this[0]],e=[],f=function(a,b){if(h.isArray(a)||a instanceof h)for(var c=0,k=a.length;c<k;c++)f(a[c],b);else a.nodeName&&"tr"===a.nodeName.toLowerCase()?e.push(a):(c=h("<tr><td/></tr>").addClass(b),h("td",c).addClass(b).html(a)[0].colSpan=V(d),e.push(c[0]))};f(a,b);c._details&&c._details.detach();c._details=h(e);
|
||||
c._detailsShow&&c._details.insertAfter(c.nTr)}return this});o(["row().child.show()","row().child().show()"],function(){Ub(this,!0);return this});o(["row().child.hide()","row().child().hide()"],function(){Ub(this,!1);return this});o(["row().child.remove()","row().child().remove()"],function(){db(this);return this});o("row().child.isShown()",function(){var a=this.context;return a.length&&this.length?a[0].aoData[this[0]]._detailsShow||!1:!1});var bc=/^([^:]+):(name|visIdx|visible)$/,Vb=function(a,b,
|
||||
c,d,e){for(var c=[],d=0,f=e.length;d<f;d++)c.push(B(a,e[d],b));return c};o("columns()",function(a,b){a===k?a="":h.isPlainObject(a)&&(b=a,a="");var b=bb(b),c=this.iterator("table",function(c){var e=a,f=b,g=c.aoColumns,j=D(g,"sName"),i=D(g,"nTh");return ab("column",e,function(a){var b=Ob(a);if(a==="")return Y(g.length);if(b!==null)return[b>=0?b:g.length+b];if(typeof a==="function"){var e=Ba(c,f);return h.map(g,function(b,f){return a(f,Vb(c,f,0,0,e),i[f])?f:null})}var k=typeof a==="string"?a.match(bc):
|
||||
"";if(k)switch(k[2]){case "visIdx":case "visible":b=parseInt(k[1],10);if(b<0){var n=h.map(g,function(a,b){return a.bVisible?b:null});return[n[n.length+b]]}return[aa(c,b)];case "name":return h.map(j,function(a,b){return a===k[1]?b:null});default:return[]}if(a.nodeName&&a._DT_CellIndex)return[a._DT_CellIndex.column];b=h(i).filter(a).map(function(){return h.inArray(this,i)}).toArray();if(b.length||!a.nodeName)return b;b=h(a).closest("*[data-dt-column]");return b.length?[b.data("dt-column")]:[]},c,f)},
|
||||
1);c.selector.cols=a;c.selector.opts=b;return c});u("columns().header()","column().header()",function(){return this.iterator("column",function(a,b){return a.aoColumns[b].nTh},1)});u("columns().footer()","column().footer()",function(){return this.iterator("column",function(a,b){return a.aoColumns[b].nTf},1)});u("columns().data()","column().data()",function(){return this.iterator("column-rows",Vb,1)});u("columns().dataSrc()","column().dataSrc()",function(){return this.iterator("column",function(a,b){return a.aoColumns[b].mData},
|
||||
1)});u("columns().cache()","column().cache()",function(a){return this.iterator("column-rows",function(b,c,d,e,f){return ja(b.aoData,f,"search"===a?"_aFilterData":"_aSortData",c)},1)});u("columns().nodes()","column().nodes()",function(){return this.iterator("column-rows",function(a,b,c,d,e){return ja(a.aoData,e,"anCells",b)},1)});u("columns().visible()","column().visible()",function(a,b){var c=this.iterator("column",function(b,c){if(a===k)return b.aoColumns[c].bVisible;var f=b.aoColumns,g=f[c],j=b.aoData,
|
||||
i,m,l;if(a!==k&&g.bVisible!==a){if(a){var n=h.inArray(!0,D(f,"bVisible"),c+1);i=0;for(m=j.length;i<m;i++)l=j[i].nTr,f=j[i].anCells,l&&l.insertBefore(f[c],f[n]||null)}else h(D(b.aoData,"anCells",c)).detach();g.bVisible=a;fa(b,b.aoHeader);fa(b,b.aoFooter);b.aiDisplay.length||h(b.nTBody).find("td[colspan]").attr("colspan",V(b));xa(b)}});a!==k&&(this.iterator("column",function(c,e){r(c,null,"column-visibility",[c,e,a,b])}),(b===k||b)&&this.columns.adjust());return c});u("columns().indexes()","column().index()",
|
||||
function(a){return this.iterator("column",function(b,c){return"visible"===a?ba(b,c):c},1)});o("columns.adjust()",function(){return this.iterator("table",function(a){$(a)},1)});o("column.index()",function(a,b){if(0!==this.context.length){var c=this.context[0];if("fromVisible"===a||"toData"===a)return aa(c,b);if("fromData"===a||"toVisible"===a)return ba(c,b)}});o("column()",function(a,b){return cb(this.columns(a,b))});o("cells()",function(a,b,c){h.isPlainObject(a)&&(a.row===k?(c=a,a=null):(c=b,b=null));
|
||||
h.isPlainObject(b)&&(c=b,b=null);if(null===b||b===k)return this.iterator("table",function(b){var d=a,e=bb(c),f=b.aoData,g=Ba(b,e),j=Rb(ja(f,g,"anCells")),i=h([].concat.apply([],j)),l,m=b.aoColumns.length,n,o,u,s,r,v;return ab("cell",d,function(a){var c=typeof a==="function";if(a===null||a===k||c){n=[];o=0;for(u=g.length;o<u;o++){l=g[o];for(s=0;s<m;s++){r={row:l,column:s};if(c){v=f[l];a(r,B(b,l,s),v.anCells?v.anCells[s]:null)&&n.push(r)}else n.push(r)}}return n}if(h.isPlainObject(a))return a.column!==
|
||||
k&&a.row!==k&&h.inArray(a.row,g)!==-1?[a]:[];c=i.filter(a).map(function(a,b){return{row:b._DT_CellIndex.row,column:b._DT_CellIndex.column}}).toArray();if(c.length||!a.nodeName)return c;v=h(a).closest("*[data-dt-row]");return v.length?[{row:v.data("dt-row"),column:v.data("dt-column")}]:[]},b,e)});var d=this.columns(b),e=this.rows(a),f,g,j,i,m;this.iterator("table",function(a,b){f=[];g=0;for(j=e[b].length;g<j;g++){i=0;for(m=d[b].length;i<m;i++)f.push({row:e[b][g],column:d[b][i]})}},1);var l=this.cells(f,
|
||||
c);h.extend(l.selector,{cols:b,rows:a,opts:c});return l});u("cells().nodes()","cell().node()",function(){return this.iterator("cell",function(a,b,c){return(a=a.aoData[b])&&a.anCells?a.anCells[c]:k},1)});o("cells().data()",function(){return this.iterator("cell",function(a,b,c){return B(a,b,c)},1)});u("cells().cache()","cell().cache()",function(a){a="search"===a?"_aFilterData":"_aSortData";return this.iterator("cell",function(b,c,d){return b.aoData[c][a][d]},1)});u("cells().render()","cell().render()",
|
||||
function(a){return this.iterator("cell",function(b,c,d){return B(b,c,d,a)},1)});u("cells().indexes()","cell().index()",function(){return this.iterator("cell",function(a,b,c){return{row:b,column:c,columnVisible:ba(a,c)}},1)});u("cells().invalidate()","cell().invalidate()",function(a){return this.iterator("cell",function(b,c,d){da(b,c,a,d)})});o("cell()",function(a,b,c){return cb(this.cells(a,b,c))});o("cell().data()",function(a){var b=this.context,c=this[0];if(a===k)return b.length&&c.length?B(b[0],
|
||||
c[0].row,c[0].column):k;kb(b[0],c[0].row,c[0].column,a);da(b[0],c[0].row,"data",c[0].column);return this});o("order()",function(a,b){var c=this.context;if(a===k)return 0!==c.length?c[0].aaSorting:k;"number"===typeof a?a=[[a,b]]:a.length&&!h.isArray(a[0])&&(a=Array.prototype.slice.call(arguments));return this.iterator("table",function(b){b.aaSorting=a.slice()})});o("order.listener()",function(a,b,c){return this.iterator("table",function(d){Ma(d,a,b,c)})});o("order.fixed()",function(a){if(!a){var b=
|
||||
this.context,b=b.length?b[0].aaSortingFixed:k;return h.isArray(b)?{pre:b}:b}return this.iterator("table",function(b){b.aaSortingFixed=h.extend(!0,{},a)})});o(["columns().order()","column().order()"],function(a){var b=this;return this.iterator("table",function(c,d){var e=[];h.each(b[d],function(b,c){e.push([c,a])});c.aaSorting=e})});o("search()",function(a,b,c,d){var e=this.context;return a===k?0!==e.length?e[0].oPreviousSearch.sSearch:k:this.iterator("table",function(e){e.oFeatures.bFilter&&ga(e,
|
||||
h.extend({},e.oPreviousSearch,{sSearch:a+"",bRegex:null===b?!1:b,bSmart:null===c?!0:c,bCaseInsensitive:null===d?!0:d}),1)})});u("columns().search()","column().search()",function(a,b,c,d){return this.iterator("column",function(e,f){var g=e.aoPreSearchCols;if(a===k)return g[f].sSearch;e.oFeatures.bFilter&&(h.extend(g[f],{sSearch:a+"",bRegex:null===b?!1:b,bSmart:null===c?!0:c,bCaseInsensitive:null===d?!0:d}),ga(e,e.oPreviousSearch,1))})});o("state()",function(){return this.context.length?this.context[0].oSavedState:
|
||||
null});o("state.clear()",function(){return this.iterator("table",function(a){a.fnStateSaveCallback.call(a.oInstance,a,{})})});o("state.loaded()",function(){return this.context.length?this.context[0].oLoadedState:null});o("state.save()",function(){return this.iterator("table",function(a){xa(a)})});n.versionCheck=n.fnVersionCheck=function(a){for(var b=n.version.split("."),a=a.split("."),c,d,e=0,f=a.length;e<f;e++)if(c=parseInt(b[e],10)||0,d=parseInt(a[e],10)||0,c!==d)return c>d;return!0};n.isDataTable=
|
||||
n.fnIsDataTable=function(a){var b=h(a).get(0),c=!1;if(a instanceof n.Api)return!0;h.each(n.settings,function(a,e){var f=e.nScrollHead?h("table",e.nScrollHead)[0]:null,g=e.nScrollFoot?h("table",e.nScrollFoot)[0]:null;if(e.nTable===b||f===b||g===b)c=!0});return c};n.tables=n.fnTables=function(a){var b=!1;h.isPlainObject(a)&&(b=a.api,a=a.visible);var c=h.map(n.settings,function(b){if(!a||a&&h(b.nTable).is(":visible"))return b.nTable});return b?new s(c):c};n.camelToHungarian=J;o("$()",function(a,b){var c=
|
||||
this.rows(b).nodes(),c=h(c);return h([].concat(c.filter(a).toArray(),c.find(a).toArray()))});h.each(["on","one","off"],function(a,b){o(b+"()",function(){var a=Array.prototype.slice.call(arguments);a[0]=h.map(a[0].split(/\s/),function(a){return!a.match(/\.dt\b/)?a+".dt":a}).join(" ");var d=h(this.tables().nodes());d[b].apply(d,a);return this})});o("clear()",function(){return this.iterator("table",function(a){oa(a)})});o("settings()",function(){return new s(this.context,this.context)});o("init()",function(){var a=
|
||||
this.context;return a.length?a[0].oInit:null});o("data()",function(){return this.iterator("table",function(a){return D(a.aoData,"_aData")}).flatten()});o("destroy()",function(a){a=a||!1;return this.iterator("table",function(b){var c=b.nTableWrapper.parentNode,d=b.oClasses,e=b.nTable,f=b.nTBody,g=b.nTHead,j=b.nTFoot,i=h(e),f=h(f),k=h(b.nTableWrapper),l=h.map(b.aoData,function(a){return a.nTr}),o;b.bDestroying=!0;r(b,"aoDestroyCallback","destroy",[b]);a||(new s(b)).columns().visible(!0);k.off(".DT").find(":not(tbody *)").off(".DT");
|
||||
h(E).off(".DT-"+b.sInstance);e!=g.parentNode&&(i.children("thead").detach(),i.append(g));j&&e!=j.parentNode&&(i.children("tfoot").detach(),i.append(j));b.aaSorting=[];b.aaSortingFixed=[];wa(b);h(l).removeClass(b.asStripeClasses.join(" "));h("th, td",g).removeClass(d.sSortable+" "+d.sSortableAsc+" "+d.sSortableDesc+" "+d.sSortableNone);f.children().detach();f.append(l);g=a?"remove":"detach";i[g]();k[g]();!a&&c&&(c.insertBefore(e,b.nTableReinsertBefore),i.css("width",b.sDestroyWidth).removeClass(d.sTable),
|
||||
(o=b.asDestroyStripes.length)&&f.children().each(function(a){h(this).addClass(b.asDestroyStripes[a%o])}));c=h.inArray(b,n.settings);-1!==c&&n.settings.splice(c,1)})});h.each(["column","row","cell"],function(a,b){o(b+"s().every()",function(a){var d=this.selector.opts,e=this;return this.iterator(b,function(f,g,h,i,m){a.call(e[b](g,"cell"===b?h:d,"cell"===b?d:k),g,h,i,m)})})});o("i18n()",function(a,b,c){var d=this.context[0],a=S(a)(d.oLanguage);a===k&&(a=b);c!==k&&h.isPlainObject(a)&&(a=a[c]!==k?a[c]:
|
||||
a._);return a.replace("%d",c)});n.version="1.10.19";n.settings=[];n.models={};n.models.oSearch={bCaseInsensitive:!0,sSearch:"",bRegex:!1,bSmart:!0};n.models.oRow={nTr:null,anCells:null,_aData:[],_aSortData:null,_aFilterData:null,_sFilterRow:null,_sRowStripe:"",src:null,idx:-1};n.models.oColumn={idx:null,aDataSort:null,asSorting:null,bSearchable:null,bSortable:null,bVisible:null,_sManualType:null,_bAttrSrc:!1,fnCreatedCell:null,fnGetData:null,fnSetData:null,mData:null,mRender:null,nTh:null,nTf:null,
|
||||
sClass:null,sContentPadding:null,sDefaultContent:null,sName:null,sSortDataType:"std",sSortingClass:null,sSortingClassJUI:null,sTitle:null,sType:null,sWidth:null,sWidthOrig:null};n.defaults={aaData:null,aaSorting:[[0,"asc"]],aaSortingFixed:[],ajax:null,aLengthMenu:[10,25,50,100],aoColumns:null,aoColumnDefs:null,aoSearchCols:[],asStripeClasses:null,bAutoWidth:!0,bDeferRender:!1,bDestroy:!1,bFilter:!0,bInfo:!0,bLengthChange:!0,bPaginate:!0,bProcessing:!1,bRetrieve:!1,bScrollCollapse:!1,bServerSide:!1,
|
||||
bSort:!0,bSortMulti:!0,bSortCellsTop:!1,bSortClasses:!0,bStateSave:!1,fnCreatedRow:null,fnDrawCallback:null,fnFooterCallback:null,fnFormatNumber:function(a){return a.toString().replace(/\B(?=(\d{3})+(?!\d))/g,this.oLanguage.sThousands)},fnHeaderCallback:null,fnInfoCallback:null,fnInitComplete:null,fnPreDrawCallback:null,fnRowCallback:null,fnServerData:null,fnServerParams:null,fnStateLoadCallback:function(a){try{return JSON.parse((-1===a.iStateDuration?sessionStorage:localStorage).getItem("DataTables_"+
|
||||
a.sInstance+"_"+location.pathname))}catch(b){}},fnStateLoadParams:null,fnStateLoaded:null,fnStateSaveCallback:function(a,b){try{(-1===a.iStateDuration?sessionStorage:localStorage).setItem("DataTables_"+a.sInstance+"_"+location.pathname,JSON.stringify(b))}catch(c){}},fnStateSaveParams:null,iStateDuration:7200,iDeferLoading:null,iDisplayLength:10,iDisplayStart:0,iTabIndex:0,oClasses:{},oLanguage:{oAria:{sSortAscending:": activate to sort column ascending",sSortDescending:": activate to sort column descending"},
|
||||
oPaginate:{sFirst:"First",sLast:"Last",sNext:"Next",sPrevious:"Previous"},sEmptyTable:"No data available in table",sInfo:"Showing _START_ to _END_ of _TOTAL_ entries",sInfoEmpty:"Showing 0 to 0 of 0 entries",sInfoFiltered:"(filtered from _MAX_ total entries)",sInfoPostFix:"",sDecimal:"",sThousands:",",sLengthMenu:"Show _MENU_ entries",sLoadingRecords:"Loading...",sProcessing:"Processing...",sSearch:"Search:",sSearchPlaceholder:"",sUrl:"",sZeroRecords:"No matching records found"},oSearch:h.extend({},
|
||||
n.models.oSearch),sAjaxDataProp:"data",sAjaxSource:null,sDom:"lfrtip",searchDelay:null,sPaginationType:"simple_numbers",sScrollX:"",sScrollXInner:"",sScrollY:"",sServerMethod:"GET",renderer:null,rowId:"DT_RowId"};Z(n.defaults);n.defaults.column={aDataSort:null,iDataSort:-1,asSorting:["asc","desc"],bSearchable:!0,bSortable:!0,bVisible:!0,fnCreatedCell:null,mData:null,mRender:null,sCellType:"td",sClass:"",sContentPadding:"",sDefaultContent:null,sName:"",sSortDataType:"std",sTitle:null,sType:null,sWidth:null};
|
||||
Z(n.defaults.column);n.models.oSettings={oFeatures:{bAutoWidth:null,bDeferRender:null,bFilter:null,bInfo:null,bLengthChange:null,bPaginate:null,bProcessing:null,bServerSide:null,bSort:null,bSortMulti:null,bSortClasses:null,bStateSave:null},oScroll:{bCollapse:null,iBarWidth:0,sX:null,sXInner:null,sY:null},oLanguage:{fnInfoCallback:null},oBrowser:{bScrollOversize:!1,bScrollbarLeft:!1,bBounding:!1,barWidth:0},ajax:null,aanFeatures:[],aoData:[],aiDisplay:[],aiDisplayMaster:[],aIds:{},aoColumns:[],aoHeader:[],
|
||||
aoFooter:[],oPreviousSearch:{},aoPreSearchCols:[],aaSorting:null,aaSortingFixed:[],asStripeClasses:null,asDestroyStripes:[],sDestroyWidth:0,aoRowCallback:[],aoHeaderCallback:[],aoFooterCallback:[],aoDrawCallback:[],aoRowCreatedCallback:[],aoPreDrawCallback:[],aoInitComplete:[],aoStateSaveParams:[],aoStateLoadParams:[],aoStateLoaded:[],sTableId:"",nTable:null,nTHead:null,nTFoot:null,nTBody:null,nTableWrapper:null,bDeferLoading:!1,bInitialised:!1,aoOpenRows:[],sDom:null,searchDelay:null,sPaginationType:"two_button",
|
||||
iStateDuration:0,aoStateSave:[],aoStateLoad:[],oSavedState:null,oLoadedState:null,sAjaxSource:null,sAjaxDataProp:null,bAjaxDataGet:!0,jqXHR:null,json:k,oAjaxData:k,fnServerData:null,aoServerParams:[],sServerMethod:null,fnFormatNumber:null,aLengthMenu:null,iDraw:0,bDrawing:!1,iDrawError:-1,_iDisplayLength:10,_iDisplayStart:0,_iRecordsTotal:0,_iRecordsDisplay:0,oClasses:{},bFiltered:!1,bSorted:!1,bSortCellsTop:null,oInit:null,aoDestroyCallback:[],fnRecordsTotal:function(){return"ssp"==y(this)?1*this._iRecordsTotal:
|
||||
this.aiDisplayMaster.length},fnRecordsDisplay:function(){return"ssp"==y(this)?1*this._iRecordsDisplay:this.aiDisplay.length},fnDisplayEnd:function(){var a=this._iDisplayLength,b=this._iDisplayStart,c=b+a,d=this.aiDisplay.length,e=this.oFeatures,f=e.bPaginate;return e.bServerSide?!1===f||-1===a?b+d:Math.min(b+a,this._iRecordsDisplay):!f||c>d||-1===a?d:c},oInstance:null,sInstance:null,iTabIndex:0,nScrollHead:null,nScrollFoot:null,aLastSort:[],oPlugins:{},rowIdFn:null,rowId:null};n.ext=x={buttons:{},
|
||||
classes:{},builder:"-source-",errMode:"alert",feature:[],search:[],selector:{cell:[],column:[],row:[]},internal:{},legacy:{ajax:null},pager:{},renderer:{pageButton:{},header:{}},order:{},type:{detect:[],search:{},order:{}},_unique:0,fnVersionCheck:n.fnVersionCheck,iApiIndex:0,oJUIClasses:{},sVersion:n.version};h.extend(x,{afnFiltering:x.search,aTypes:x.type.detect,ofnSearch:x.type.search,oSort:x.type.order,afnSortData:x.order,aoFeatures:x.feature,oApi:x.internal,oStdClasses:x.classes,oPagination:x.pager});
|
||||
h.extend(n.ext.classes,{sTable:"dataTable",sNoFooter:"no-footer",sPageButton:"paginate_button",sPageButtonActive:"current",sPageButtonDisabled:"disabled",sStripeOdd:"odd",sStripeEven:"even",sRowEmpty:"dataTables_empty",sWrapper:"dataTables_wrapper",sFilter:"dataTables_filter",sInfo:"dataTables_info",sPaging:"dataTables_paginate paging_",sLength:"dataTables_length",sProcessing:"dataTables_processing",sSortAsc:"sorting_asc",sSortDesc:"sorting_desc",sSortable:"sorting",sSortableAsc:"sorting_asc_disabled",
|
||||
sSortableDesc:"sorting_desc_disabled",sSortableNone:"sorting_disabled",sSortColumn:"sorting_",sFilterInput:"",sLengthSelect:"",sScrollWrapper:"dataTables_scroll",sScrollHead:"dataTables_scrollHead",sScrollHeadInner:"dataTables_scrollHeadInner",sScrollBody:"dataTables_scrollBody",sScrollFoot:"dataTables_scrollFoot",sScrollFootInner:"dataTables_scrollFootInner",sHeaderTH:"",sFooterTH:"",sSortJUIAsc:"",sSortJUIDesc:"",sSortJUI:"",sSortJUIAscAllowed:"",sSortJUIDescAllowed:"",sSortJUIWrapper:"",sSortIcon:"",
|
||||
sJUIHeader:"",sJUIFooter:""});var Lb=n.ext.pager;h.extend(Lb,{simple:function(){return["previous","next"]},full:function(){return["first","previous","next","last"]},numbers:function(a,b){return[ia(a,b)]},simple_numbers:function(a,b){return["previous",ia(a,b),"next"]},full_numbers:function(a,b){return["first","previous",ia(a,b),"next","last"]},first_last_numbers:function(a,b){return["first",ia(a,b),"last"]},_numbers:ia,numbers_length:7});h.extend(!0,n.ext.renderer,{pageButton:{_:function(a,b,c,d,e,
|
||||
f){var g=a.oClasses,j=a.oLanguage.oPaginate,i=a.oLanguage.oAria.paginate||{},m,l,n=0,o=function(b,d){var k,s,u,r,v=function(b){Ta(a,b.data.action,true)};k=0;for(s=d.length;k<s;k++){r=d[k];if(h.isArray(r)){u=h("<"+(r.DT_el||"div")+"/>").appendTo(b);o(u,r)}else{m=null;l="";switch(r){case "ellipsis":b.append('<span class="ellipsis">…</span>');break;case "first":m=j.sFirst;l=r+(e>0?"":" "+g.sPageButtonDisabled);break;case "previous":m=j.sPrevious;l=r+(e>0?"":" "+g.sPageButtonDisabled);break;case "next":m=
|
||||
j.sNext;l=r+(e<f-1?"":" "+g.sPageButtonDisabled);break;case "last":m=j.sLast;l=r+(e<f-1?"":" "+g.sPageButtonDisabled);break;default:m=r+1;l=e===r?g.sPageButtonActive:""}if(m!==null){u=h("<a>",{"class":g.sPageButton+" "+l,"aria-controls":a.sTableId,"aria-label":i[r],"data-dt-idx":n,tabindex:a.iTabIndex,id:c===0&&typeof r==="string"?a.sTableId+"_"+r:null}).html(m).appendTo(b);Wa(u,{action:r},v);n++}}}},s;try{s=h(b).find(H.activeElement).data("dt-idx")}catch(u){}o(h(b).empty(),d);s!==k&&h(b).find("[data-dt-idx="+
|
||||
s+"]").focus()}}});h.extend(n.ext.type.detect,[function(a,b){var c=b.oLanguage.sDecimal;return $a(a,c)?"num"+c:null},function(a){if(a&&!(a instanceof Date)&&!Zb.test(a))return null;var b=Date.parse(a);return null!==b&&!isNaN(b)||M(a)?"date":null},function(a,b){var c=b.oLanguage.sDecimal;return $a(a,c,!0)?"num-fmt"+c:null},function(a,b){var c=b.oLanguage.sDecimal;return Qb(a,c)?"html-num"+c:null},function(a,b){var c=b.oLanguage.sDecimal;return Qb(a,c,!0)?"html-num-fmt"+c:null},function(a){return M(a)||
|
||||
"string"===typeof a&&-1!==a.indexOf("<")?"html":null}]);h.extend(n.ext.type.search,{html:function(a){return M(a)?a:"string"===typeof a?a.replace(Nb," ").replace(Aa,""):""},string:function(a){return M(a)?a:"string"===typeof a?a.replace(Nb," "):a}});var za=function(a,b,c,d){if(0!==a&&(!a||"-"===a))return-Infinity;b&&(a=Pb(a,b));a.replace&&(c&&(a=a.replace(c,"")),d&&(a=a.replace(d,"")));return 1*a};h.extend(x.type.order,{"date-pre":function(a){a=Date.parse(a);return isNaN(a)?-Infinity:a},"html-pre":function(a){return M(a)?
|
||||
"":a.replace?a.replace(/<.*?>/g,"").toLowerCase():a+""},"string-pre":function(a){return M(a)?"":"string"===typeof a?a.toLowerCase():!a.toString?"":a.toString()},"string-asc":function(a,b){return a<b?-1:a>b?1:0},"string-desc":function(a,b){return a<b?1:a>b?-1:0}});Da("");h.extend(!0,n.ext.renderer,{header:{_:function(a,b,c,d){h(a.nTable).on("order.dt.DT",function(e,f,g,h){if(a===f){e=c.idx;b.removeClass(c.sSortingClass+" "+d.sSortAsc+" "+d.sSortDesc).addClass(h[e]=="asc"?d.sSortAsc:h[e]=="desc"?d.sSortDesc:
|
||||
c.sSortingClass)}})},jqueryui:function(a,b,c,d){h("<div/>").addClass(d.sSortJUIWrapper).append(b.contents()).append(h("<span/>").addClass(d.sSortIcon+" "+c.sSortingClassJUI)).appendTo(b);h(a.nTable).on("order.dt.DT",function(e,f,g,h){if(a===f){e=c.idx;b.removeClass(d.sSortAsc+" "+d.sSortDesc).addClass(h[e]=="asc"?d.sSortAsc:h[e]=="desc"?d.sSortDesc:c.sSortingClass);b.find("span."+d.sSortIcon).removeClass(d.sSortJUIAsc+" "+d.sSortJUIDesc+" "+d.sSortJUI+" "+d.sSortJUIAscAllowed+" "+d.sSortJUIDescAllowed).addClass(h[e]==
|
||||
"asc"?d.sSortJUIAsc:h[e]=="desc"?d.sSortJUIDesc:c.sSortingClassJUI)}})}}});var eb=function(a){return"string"===typeof a?a.replace(/</g,"<").replace(/>/g,">").replace(/"/g,"""):a};n.render={number:function(a,b,c,d,e){return{display:function(f){if("number"!==typeof f&&"string"!==typeof f)return f;var g=0>f?"-":"",h=parseFloat(f);if(isNaN(h))return eb(f);h=h.toFixed(c);f=Math.abs(h);h=parseInt(f,10);f=c?b+(f-h).toFixed(c).substring(2):"";return g+(d||"")+h.toString().replace(/\B(?=(\d{3})+(?!\d))/g,
|
||||
a)+f+(e||"")}}},text:function(){return{display:eb,filter:eb}}};h.extend(n.ext.internal,{_fnExternApiFunc:Mb,_fnBuildAjax:sa,_fnAjaxUpdate:mb,_fnAjaxParameters:vb,_fnAjaxUpdateDraw:wb,_fnAjaxDataSrc:ta,_fnAddColumn:Ea,_fnColumnOptions:ka,_fnAdjustColumnSizing:$,_fnVisibleToColumnIndex:aa,_fnColumnIndexToVisible:ba,_fnVisbleColumns:V,_fnGetColumns:ma,_fnColumnTypes:Ga,_fnApplyColumnDefs:jb,_fnHungarianMap:Z,_fnCamelToHungarian:J,_fnLanguageCompat:Ca,_fnBrowserDetect:hb,_fnAddData:O,_fnAddTr:na,_fnNodeToDataIndex:function(a,
|
||||
b){return b._DT_RowIndex!==k?b._DT_RowIndex:null},_fnNodeToColumnIndex:function(a,b,c){return h.inArray(c,a.aoData[b].anCells)},_fnGetCellData:B,_fnSetCellData:kb,_fnSplitObjNotation:Ja,_fnGetObjectDataFn:S,_fnSetObjectDataFn:N,_fnGetDataMaster:Ka,_fnClearTable:oa,_fnDeleteIndex:pa,_fnInvalidate:da,_fnGetRowElements:Ia,_fnCreateTr:Ha,_fnBuildHead:lb,_fnDrawHead:fa,_fnDraw:P,_fnReDraw:T,_fnAddOptionsHtml:ob,_fnDetectHeader:ea,_fnGetUniqueThs:ra,_fnFeatureHtmlFilter:qb,_fnFilterComplete:ga,_fnFilterCustom:zb,
|
||||
_fnFilterColumn:yb,_fnFilter:xb,_fnFilterCreateSearch:Pa,_fnEscapeRegex:Qa,_fnFilterData:Ab,_fnFeatureHtmlInfo:tb,_fnUpdateInfo:Db,_fnInfoMacros:Eb,_fnInitialise:ha,_fnInitComplete:ua,_fnLengthChange:Ra,_fnFeatureHtmlLength:pb,_fnFeatureHtmlPaginate:ub,_fnPageChange:Ta,_fnFeatureHtmlProcessing:rb,_fnProcessingDisplay:C,_fnFeatureHtmlTable:sb,_fnScrollDraw:la,_fnApplyToChildren:I,_fnCalculateColumnWidths:Fa,_fnThrottle:Oa,_fnConvertToWidth:Fb,_fnGetWidestNode:Gb,_fnGetMaxLenString:Hb,_fnStringToCss:v,
|
||||
_fnSortFlatten:X,_fnSort:nb,_fnSortAria:Jb,_fnSortListener:Va,_fnSortAttachListener:Ma,_fnSortingClasses:wa,_fnSortData:Ib,_fnSaveState:xa,_fnLoadState:Kb,_fnSettingsFromNode:ya,_fnLog:K,_fnMap:F,_fnBindAction:Wa,_fnCallbackReg:z,_fnCallbackFire:r,_fnLengthOverflow:Sa,_fnRenderer:Na,_fnDataSource:y,_fnRowAttributes:La,_fnExtend:Xa,_fnCalculateEnd:function(){}});h.fn.dataTable=n;n.$=h;h.fn.dataTableSettings=n.settings;h.fn.dataTableExt=n.ext;h.fn.DataTable=function(a){return h(this).dataTable(a).api()};
|
||||
h.each(n,function(a,b){h.fn.DataTable[a]=b});return h.fn.dataTable});
|
Before Width: | Height: | Size: 158 B |
Before Width: | Height: | Size: 201 B |
Before Width: | Height: | Size: 157 B |
Before Width: | Height: | Size: 11 KiB |
|
@ -1,11 +1,11 @@
|
|||
__package__ = 'archivebox'
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
import requests
|
||||
import json as pyjson
|
||||
|
||||
|
||||
from typing import List, Optional, Any
|
||||
from pathlib import Path
|
||||
from inspect import signature
|
||||
from functools import wraps
|
||||
from hashlib import sha256
|
||||
|
@ -13,10 +13,9 @@ from urllib.parse import urlparse, quote, unquote
|
|||
from html import escape, unescape
|
||||
from datetime import datetime
|
||||
from dateparser import parse as dateparser
|
||||
|
||||
import requests
|
||||
from requests.exceptions import RequestException, ReadTimeout
|
||||
from .base32_crockford import encode as base32_encode # type: ignore
|
||||
|
||||
from .vendor.base32_crockford import encode as base32_encode # type: ignore
|
||||
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
|
||||
|
||||
try:
|
||||
|
@ -246,6 +245,7 @@ def chrome_args(**options) -> List[str]:
|
|||
|
||||
return cmd_args
|
||||
|
||||
|
||||
def ansi_to_html(text):
|
||||
"""
|
||||
Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
|
||||
|
|
0
archivebox/vendor/__init__.py
vendored
Normal file
1
archivebox/vendor/base32-crockford
vendored
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 1ffb6021485b666ea6a562abd0a1ea6f7021188f
|
1
archivebox/vendor/base32_crockford.py
vendored
Symbolic link
|
@ -0,0 +1 @@
|
|||
base32-crockford/base32_crockford.py
|
1
archivebox/vendor/django-taggit
vendored
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 1e4dca37e534ca70e99c39fb4198970eb8aad5aa
|
1
archivebox/vendor/pocket
vendored
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 3a0c5c76832b0e92923383af3f9831ece7901c2f
|
1
archivebox/vendor/pocket.py
vendored
Symbolic link
|
@ -0,0 +1 @@
|
|||
pocket/pocket.py
|
1
archivebox/vendor/taggit_utils.py
vendored
Symbolic link
|
@ -0,0 +1 @@
|
|||
django-taggit/taggit/utils.py
|
|
@ -16,9 +16,11 @@ cd "$REPO_DIR"
|
|||
|
||||
# pipenv install --dev
|
||||
|
||||
# the order matters
|
||||
./bin/build_docs.sh
|
||||
./bin/build_pip.sh
|
||||
./bin/build_deb.sh
|
||||
./bin/build_brew.sh
|
||||
./bin/build_docker.sh
|
||||
|
||||
echo "[√] Done. Install the built package by running:"
|
||||
|
|
29
bin/build_brew.sh
Executable file
|
@ -0,0 +1,29 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
### Bash Environment Setup
|
||||
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
|
||||
# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
|
||||
# set -o xtrace
|
||||
set -o errexit
|
||||
set -o errtrace
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
IFS=$'\n'
|
||||
|
||||
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
||||
|
||||
|
||||
CURRENT_PLAFORM="$(uname)"
|
||||
REQUIRED_PLATFORM="Darwin"
|
||||
if [[ "$CURRENT_PLAFORM" != "$REQUIRED_PLATFORM" ]]; then
|
||||
echo "[!] Skipping the Homebrew package build on $CURRENT_PLAFORM (it can only be run on $REQUIRED_PLATFORM)."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
||||
cd "$REPO_DIR/brew_dist"
|
||||
# make sure archivebox.rb is up-to-date with the dependencies
|
||||
|
||||
echo "[+] Building Homebrew bottle"
|
||||
brew install --build-bottle ./archivebox.rb
|
||||
brew bottle archivebox
|
|
@ -12,9 +12,20 @@ IFS=$'\n'
|
|||
|
||||
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
||||
|
||||
source "$REPO_DIR/.venv/bin/activate"
|
||||
if [[ -f "$REPO_DIR/.venv/bin/activate" ]]; then
|
||||
source "$REPO_DIR/.venv/bin/activate"
|
||||
else
|
||||
echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv"
|
||||
fi
|
||||
cd "$REPO_DIR"
|
||||
|
||||
CURRENT_PLAFORM="$(uname)"
|
||||
REQUIRED_PLATFORM="Linux"
|
||||
if [[ "$CURRENT_PLAFORM" != "$REQUIRED_PLATFORM" ]]; then
|
||||
echo "[!] Skipping the Debian package build on $CURRENT_PLAFORM (it can only be run on $REQUIRED_PLATFORM)."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
||||
DEBIAN_VERSION="1"
|
||||
PGP_KEY_ID="7D5695D3B618872647861D51C38137A7C1675988"
|
||||
|
@ -30,6 +41,8 @@ PGP_KEY_ID="7D5695D3B618872647861D51C38137A7C1675988"
|
|||
# cleanup build artifacts
|
||||
rm -Rf build deb_dist dist archivebox-*.tar.gz
|
||||
|
||||
# make sure the stdeb.cfg file is up-to-date with all the dependencies
|
||||
|
||||
# build source and binary packages
|
||||
python3 setup.py --command-packages=stdeb.command \
|
||||
sdist_dsc --debian-version=$DEBIAN_VERSION \
|
||||
|
|
|
@ -14,6 +14,7 @@ REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && p
|
|||
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
||||
cd "$REPO_DIR"
|
||||
|
||||
which docker > /dev/null
|
||||
|
||||
echo "[+] Building docker image in the background..."
|
||||
docker build . -t archivebox \
|
||||
|
|
|
@ -12,7 +12,11 @@ IFS=$'\n'
|
|||
|
||||
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
||||
|
||||
source "$REPO_DIR/.venv/bin/activate"
|
||||
if [[ -f "$REPO_DIR/.venv/bin/activate" ]]; then
|
||||
source "$REPO_DIR/.venv/bin/activate"
|
||||
else
|
||||
echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv"
|
||||
fi
|
||||
cd "$REPO_DIR"
|
||||
|
||||
|
||||
|
|
|
@ -12,13 +12,20 @@ IFS=$'\n'
|
|||
|
||||
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
||||
|
||||
source "$REPO_DIR/.venv/bin/activate"
|
||||
if [[ -f "$REPO_DIR/.venv/bin/activate" ]]; then
|
||||
source "$REPO_DIR/.venv/bin/activate"
|
||||
else
|
||||
echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv"
|
||||
fi
|
||||
cd "$REPO_DIR"
|
||||
|
||||
|
||||
echo "[*] Cleaning up build dirs"
|
||||
cd "$REPO_DIR"
|
||||
rm -Rf build dist archivebox.egg-info
|
||||
rm -Rf build dist
|
||||
|
||||
echo "[+] Building sdist, bdist_egg, and bdist_wheel"
|
||||
python3 setup.py sdist bdist_egg bdist_wheel
|
||||
echo "[+] Building sdist, bdist_wheel, and egg_info"
|
||||
python3 setup.py \
|
||||
sdist --dist-dir=./pip_dist \
|
||||
bdist_wheel --dist-dir=./pip_dist \
|
||||
egg_info --egg-base=./pip_dist
|
||||
|
|
|
@ -48,6 +48,7 @@ echo "${contents}" > package.json
|
|||
# Push build to github
|
||||
echo "[^] Pushing source to github"
|
||||
git add "$REPO_DIR/docs"
|
||||
git add "$REPO_DIR/deb_dist"
|
||||
git add "$REPO_DIR/package.json"
|
||||
git add "$REPO_DIR/package-lock.json"
|
||||
git add "$REPO_DIR/archivebox.egg-info"
|
||||
|
@ -59,10 +60,10 @@ git push origin --tags
|
|||
|
||||
# Push releases to github
|
||||
echo "[^] Uploading to test.pypi.org"
|
||||
python3 -m twine upload --repository testpypi dist/*
|
||||
python3 -m twine upload --repository testpypi pip_dist/*.{whl,tar.gz}
|
||||
|
||||
echo "[^] Uploading to pypi.org"
|
||||
python3 -m twine upload --repository pypi dist/*
|
||||
python3 -m twine upload --repository pypi pip_dist/*.{whl,tar.gz}
|
||||
|
||||
echo "[^] Uploading to launchpad.net"
|
||||
dput archivebox "deb_dist/archivebox_${NEW_VERSION}-1_source.changes"
|
||||
|
|
|
@ -14,4 +14,4 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
|||
|
||||
source "$DIR/.venv/bin/activate"
|
||||
|
||||
pytest -s
|
||||
pytest -s --basetemp=tests/out --ignore=archivebox/vendor
|
||||
|
|
1
brew_dist
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 55f57fcc0e5e7d0b1c0b93cef537cc97936b2848
|
1
deb_dist
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit cd7f47d48e487c5192670cd5b68042d41b05d281
|
1
docker
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 236f7881e3105b218864d9b3185b17c44b306106
|
|
@ -21,9 +21,23 @@ services:
|
|||
environment:
|
||||
- USE_COLOR=True
|
||||
- SHOW_PROGRESS=False
|
||||
- SEARCH_BACKEND_ENGINE=sonic
|
||||
- SEARCH_BACKEND_HOST_NAME=sonic
|
||||
volumes:
|
||||
- ./data:/data
|
||||
|
||||
depends_on:
|
||||
- sonic
|
||||
|
||||
# Run sonic search backend
|
||||
sonic:
|
||||
image: valeriansaliou/sonic:v1.3.0
|
||||
ports:
|
||||
- 1491:1491
|
||||
environment:
|
||||
- SEARCH_BACKEND_PASSWORD=SecretPassword
|
||||
volumes:
|
||||
- ./etc/sonic/config.cfg:/etc/sonic.cfg
|
||||
- ./data:/var/lib/sonic/store/
|
||||
|
||||
# Optional Addons: tweak these examples as needed for your specific use case
|
||||
|
||||
|
@ -73,3 +87,4 @@ services:
|
|||
# volumes:
|
||||
# ./data:/archivebox
|
||||
# ./data/wayback:/webarchive
|
||||
|
||||
|
|
2
docs
|
@ -1 +1 @@
|
|||
Subproject commit d5071d92367a91bb585abb5da7c65ebc61d0d7b0
|
||||
Subproject commit 6228411cb63872fb88bc07a0f7be43b7f535337b
|
66
etc/sonic/config.cfg
Normal file
|
@ -0,0 +1,66 @@
|
|||
# Sonic
|
||||
# Fast, lightweight and schema-less search backend
|
||||
# Configuration file
|
||||
# Example: https://github.com/valeriansaliou/sonic/blob/master/config.cfg
|
||||
|
||||
|
||||
[server]
|
||||
|
||||
log_level = "info"
|
||||
|
||||
|
||||
[channel]
|
||||
|
||||
inet = "0.0.0.0:1491"
|
||||
tcp_timeout = 300
|
||||
|
||||
auth_password = "${env.SEARCH_BACKEND_PASSWORD}"
|
||||
|
||||
[channel.search]
|
||||
|
||||
query_limit_default = 65535
|
||||
query_limit_maximum = 65535
|
||||
query_alternates_try = 10
|
||||
|
||||
suggest_limit_default = 5
|
||||
suggest_limit_maximum = 20
|
||||
|
||||
|
||||
[store]
|
||||
|
||||
[store.kv]
|
||||
|
||||
path = "/var/lib/sonic/store/kv/"
|
||||
|
||||
retain_word_objects = 100000
|
||||
|
||||
[store.kv.pool]
|
||||
|
||||
inactive_after = 1800
|
||||
|
||||
[store.kv.database]
|
||||
|
||||
flush_after = 900
|
||||
|
||||
compress = true
|
||||
parallelism = 2
|
||||
max_files = 100
|
||||
max_compactions = 1
|
||||
max_flushes = 1
|
||||
write_buffer = 16384
|
||||
write_ahead_log = true
|
||||
|
||||
[store.fst]
|
||||
|
||||
path = "/var/lib/sonic/store/fst/"
|
||||
|
||||
[store.fst.pool]
|
||||
|
||||
inactive_after = 300
|
||||
|
||||
[store.fst.graph]
|
||||
|
||||
consolidate_after = 180
|
||||
|
||||
max_size = 2048
|
||||
max_words = 250000
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "archivebox",
|
||||
"version": "0.4.24",
|
||||
"version": "0.5.1",
|
||||
"description": "ArchiveBox: The self-hosted internet archive",
|
||||
"author": "Nick Sweeting <archivebox-npm@sweeting.me>",
|
||||
"license": "MIT",
|
||||
|
|