1
0
Fork 0
mirror of synced 2024-06-26 10:00:19 +12:00

Merge branch 'master' into node_config

This commit is contained in:
Nick Sweeting 2020-08-18 19:04:50 -04:00 committed by GitHub
commit 75deaf293d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
25 changed files with 224 additions and 154 deletions

19
.npmignore Normal file
View file

@ -0,0 +1,19 @@
tests/
archivebox/
archivebox.egg-info/
build/
dist/
docs/
etc/
.github
.gitmodules
.dockerignore
.flake8
CNAME
_config.yml
docker-compose.yaml
docker-compose.yml
Dockerfile
MANIFEST.in
Pipfile
setup.py

View file

@ -1,4 +1,4 @@
include LICENSE graft archivebox
include README.md global-exclude .DS_Store
include archivebox/VERSION global-exclude __pycache__
recursive-include archivebox/themes * global-exclude *.pyc

View file

@ -63,10 +63,10 @@ To get started, you can [install them manually](https://github.com/pirate/Archiv
```bash ```bash
# Docker # Docker
mkdir data && cd data mkdir data && cd data
docker run -v $PWD:/data nikisweeting/archivebox init docker run -v $PWD:/data -it nikisweeting/archivebox init
docker run -v $PWD:/data nikisweeting/archivebox add 'https://example.com' docker run -v $PWD:/data -it nikisweeting/archivebox add 'https://example.com'
docker run -v $PWD:/data -it nikisweeting/archivebox manage createsuperuser docker run -v $PWD:/data -it nikisweeting/archivebox manage createsuperuser
docker run -v $PWD:/data -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000 docker run -v $PWD:/data -it -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000
open http://127.0.0.1:8000 open http://127.0.0.1:8000
``` ```

View file

@ -1,6 +1,6 @@
Metadata-Version: 2.1 Metadata-Version: 2.1
Name: archivebox Name: archivebox
Version: 0.4.17 Version: 0.4.20
Summary: The self-hosted internet archive. Summary: The self-hosted internet archive.
Home-page: https://github.com/pirate/ArchiveBox Home-page: https://github.com/pirate/ArchiveBox
Author: Nick Sweeting Author: Nick Sweeting
@ -77,10 +77,10 @@ Description: <div align="center">
```bash ```bash
# Docker # Docker
mkdir data && cd data mkdir data && cd data
docker run -v $PWD:/data nikisweeting/archivebox init docker run -v $PWD:/data -it nikisweeting/archivebox init
docker run -v $PWD:/data nikisweeting/archivebox add 'https://example.com' docker run -v $PWD:/data -it nikisweeting/archivebox add 'https://example.com'
docker run -v $PWD:/data -it nikisweeting/archivebox manage createsuperuser docker run -v $PWD:/data -it nikisweeting/archivebox manage createsuperuser
docker run -v $PWD:/data -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000 docker run -v $PWD:/data -it -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000
open http://127.0.0.1:8000 open http://127.0.0.1:8000
``` ```

View file

@ -1,13 +1,16 @@
LICENSE
MANIFEST.in MANIFEST.in
README.md README.md
setup.py setup.py
archivebox/VERSION archivebox/.flake8
archivebox/LICENSE
archivebox/README.md
archivebox/__init__.py archivebox/__init__.py
archivebox/__main__.py archivebox/__main__.py
archivebox/logging_util.py archivebox/logging_util.py
archivebox/main.py archivebox/main.py
archivebox/manage.py archivebox/manage.py
archivebox/mypy.ini
archivebox/package.json
archivebox/system.py archivebox/system.py
archivebox/util.py archivebox/util.py
archivebox.egg-info/PKG-INFO archivebox.egg-info/PKG-INFO
@ -45,6 +48,7 @@ archivebox/core/urls.py
archivebox/core/views.py archivebox/core/views.py
archivebox/core/welcome_message.py archivebox/core/welcome_message.py
archivebox/core/wsgi.py archivebox/core/wsgi.py
archivebox/core/management/commands/archivebox.py
archivebox/core/migrations/0001_initial.py archivebox/core/migrations/0001_initial.py
archivebox/core/migrations/0002_auto_20200625_1521.py archivebox/core/migrations/0002_auto_20200625_1521.py
archivebox/core/migrations/0003_auto_20200630_1034.py archivebox/core/migrations/0003_auto_20200630_1034.py
@ -110,16 +114,4 @@ archivebox/themes/legacy/static/jquery.min.js
archivebox/themes/legacy/static/sort_asc.png archivebox/themes/legacy/static/sort_asc.png
archivebox/themes/legacy/static/sort_both.png archivebox/themes/legacy/static/sort_both.png
archivebox/themes/legacy/static/sort_desc.png archivebox/themes/legacy/static/sort_desc.png
archivebox/themes/legacy/static/spinner.gif archivebox/themes/legacy/static/spinner.gif
tests/__init__.py
tests/conftest.py
tests/fixtures.py
tests/test_args.py
tests/test_extractors.py
tests/test_init.py
tests/test_oneshot.py
tests/test_remove.py
tests/test_title.py
tests/test_util.py
tests/mock_server/__init__.py
tests/mock_server/server.py

View file

@ -1,2 +1 @@
archivebox archivebox
tests

1
archivebox/LICENSE Symbolic link
View file

@ -0,0 +1 @@
../LICENSE

1
archivebox/README.md Symbolic link
View file

@ -0,0 +1 @@
../README.md

View file

@ -1 +0,0 @@
0.4.17

View file

@ -104,11 +104,11 @@ def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided,
) )
command = parser.parse_args(args or ()) command = parser.parse_args(args or ())
if command.help or command.subcommand is None: if command.version:
command.subcommand = 'help'
elif command.version:
command.subcommand = 'version' command.subcommand = 'version'
elif command.help or command.subcommand is None:
command.subcommand = 'help'
if command.subcommand not in ('help', 'version', 'status'): if command.subcommand not in ('help', 'version', 'status'):
from ..logging_util import log_cli_command from ..logging_util import log_cli_command

View file

@ -4,10 +4,11 @@ import os
import io import io
import re import re
import sys import sys
import django import json
import getpass import getpass
import shutil import shutil
import platform import platform
import django
from hashlib import md5 from hashlib import md5
from pathlib import Path from pathlib import Path
@ -58,7 +59,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
'MEDIA_TIMEOUT': {'type': int, 'default': 3600}, 'MEDIA_TIMEOUT': {'type': int, 'default': 3600},
'OUTPUT_PERMISSIONS': {'type': str, 'default': '755'}, 'OUTPUT_PERMISSIONS': {'type': str, 'default': '755'},
'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'}, 'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'},
'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2)(\?.*)?$'}, # to avoid downloading code assets as their own pages 'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'}, # to avoid downloading code assets as their own pages
}, },
'SERVER_CONFIG': { 'SERVER_CONFIG': {
@ -186,7 +187,6 @@ STATICFILE_EXTENSIONS = {
# html, htm, shtml, xhtml, xml, aspx, php, cgi # html, htm, shtml, xhtml, xml, aspx, php, cgi
} }
VERSION_FILENAME = 'VERSION'
PYTHON_DIR_NAME = 'archivebox' PYTHON_DIR_NAME = 'archivebox'
TEMPLATES_DIR_NAME = 'themes' TEMPLATES_DIR_NAME = 'themes'
@ -232,10 +232,10 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
'CONFIG_FILE': {'default': lambda c: os.path.abspath(os.path.expanduser(c['CONFIG_FILE'])) if c['CONFIG_FILE'] else os.path.join(c['OUTPUT_DIR'], CONFIG_FILENAME)}, 'CONFIG_FILE': {'default': lambda c: os.path.abspath(os.path.expanduser(c['CONFIG_FILE'])) if c['CONFIG_FILE'] else os.path.join(c['OUTPUT_DIR'], CONFIG_FILENAME)},
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and os.path.abspath(os.path.expanduser(c['COOKIES_FILE']))}, 'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and os.path.abspath(os.path.expanduser(c['COOKIES_FILE']))},
'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (os.path.abspath(os.path.expanduser(c['CHROME_USER_DATA_DIR'])) or None)}, 'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (os.path.abspath(os.path.expanduser(c['CHROME_USER_DATA_DIR'])) or None)},
'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'], re.IGNORECASE | re.UNICODE | re.MULTILINE)}, 'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0]}, 'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0]},
'VERSION': {'default': lambda c: open(os.path.join(c['PYTHON_DIR'], VERSION_FILENAME), 'r').read().strip()}, 'VERSION': {'default': lambda c: json.loads((Path(c['PYTHON_DIR']) / 'package.json').read_text().strip())['version']},
'GIT_SHA': {'default': lambda c: c['VERSION'].split('+')[-1] or 'unknown'}, 'GIT_SHA': {'default': lambda c: c['VERSION'].split('+')[-1] or 'unknown'},
'PYTHON_BINARY': {'default': lambda c: sys.executable}, 'PYTHON_BINARY': {'default': lambda c: sys.executable},
@ -510,16 +510,9 @@ def bin_version(binary: Optional[str]) -> Optional[str]:
return None return None
try: try:
if binary.split('/')[-1] in ('single-file',): version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode()
# these dependencies dont support the --version flag, but are valid still # take first 3 columns of first line of version info
if run([abspath, "--help"], stdout=PIPE).returncode == 0: return ' '.join(version_str.split('\n')[0].strip().split()[:3])
return '0.0.0'
else:
return None
else:
version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode()
# take first 3 columns of first line of version info
return ' '.join(version_str.split('\n')[0].strip().split()[:3])
except OSError: except OSError:
pass pass
# stderr(f'[X] Unable to find working version of dependency: {binary}', color='red') # stderr(f'[X] Unable to find working version of dependency: {binary}', color='red')
@ -534,6 +527,10 @@ def bin_path(binary: Optional[str]) -> Optional[str]:
if binary is None: if binary is None:
return None return None
node_modules_bin = Path('.') / 'node_modules' / '.bin' / binary
if node_modules_bin.exists():
return str(node_modules_bin.resolve())
return shutil.which(os.path.expanduser(binary)) or binary return shutil.which(os.path.expanduser(binary)) or binary
def bin_hash(binary: Optional[str]) -> Optional[str]: def bin_hash(binary: Optional[str]) -> Optional[str]:
@ -784,6 +781,10 @@ globals().update(CONFIG)
# Timezone set as UTC # Timezone set as UTC
os.environ["TZ"] = 'UTC' os.environ["TZ"] = 'UTC'
# add ./node_modules/.bin to $PATH so we can use node scripts in extractors
NODE_BIN_PATH = str((Path(CONFIG["OUTPUT_DIR"]).absolute() / 'node_modules' / '.bin'))
sys.path.append(NODE_BIN_PATH)
############################## Importable Checkers ############################# ############################## Importable Checkers #############################
@ -825,16 +826,6 @@ def check_system_config(config: ConfigDict=CONFIG) -> None:
stderr(' CHROME_USER_DATA_DIR="{}"'.format(config['CHROME_USER_DATA_DIR'].split('/Default')[0])) stderr(' CHROME_USER_DATA_DIR="{}"'.format(config['CHROME_USER_DATA_DIR'].split('/Default')[0]))
raise SystemExit(2) raise SystemExit(2)
def print_dependency_additional_info(dependency: str) -> None:
if dependency == "SINGLEFILE_BINARY":
hint(('npm install -g git+https://github.com/gildas-lormeau/SingleFile.git"',
'or set SAVE_SINGLEFILE=False to silence this warning',
''))
if dependency == "READABILITY_BINARY":
hint(('npm install -g git+https://github.com/pirate/readability-extractor.git"',
'or set SAVE_READABILITY=False to silence this warning',
''))
def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None: def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
invalid_dependencies = [ invalid_dependencies = [
@ -851,9 +842,10 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
info['version'] or 'unable to detect version', info['version'] or 'unable to detect version',
) )
) )
print_dependency_additional_info(dependency) if dependency in ('SINGLEFILE_BINARY', 'READABILITY_BINARY'):
stderr(' {lightred}Hint:{reset} To get more info on dependencies run:'.format(**ANSI)) hint(('npm install --prefix . "git+https://github.com/pirate/ArchiveBox.git"',
stderr(' archivebox --version') f'or archivebox config --set SAVE_{dependency.rsplit("_", 1)[0]}=False to silence this warning',
''), prefix=' ')
stderr('') stderr('')
if config['TIMEOUT'] < 5: if config['TIMEOUT'] < 5:

View file

@ -31,15 +31,16 @@ class ConfigDict(BaseConfig, total=False):
SHOW_PROGRESS: bool SHOW_PROGRESS: bool
IN_DOCKER: bool IN_DOCKER: bool
OUTPUT_DIR: str OUTPUT_DIR: Optional[str]
CONFIG_FILE: str CONFIG_FILE: Optional[str]
ONLY_NEW: bool ONLY_NEW: bool
TIMEOUT: int TIMEOUT: int
MEDIA_TIMEOUT: int MEDIA_TIMEOUT: int
OUTPUT_PERMISSIONS: str OUTPUT_PERMISSIONS: str
URL_BLACKLIST: Optional[str] RESTRICT_FILE_NAMES: str
URL_BLACKLIST: str
SECRET_KEY: str SECRET_KEY: Optional[str]
BIND_ADDR: str BIND_ADDR: str
ALLOWED_HOSTS: str ALLOWED_HOSTS: str
DEBUG: bool DEBUG: bool
@ -52,10 +53,11 @@ class ConfigDict(BaseConfig, total=False):
SAVE_FAVICON: bool SAVE_FAVICON: bool
SAVE_WGET: bool SAVE_WGET: bool
SAVE_WGET_REQUISITES: bool SAVE_WGET_REQUISITES: bool
SAVE_SINGLEFILE: bool
SAVE_READABILITY: bool
SAVE_PDF: bool SAVE_PDF: bool
SAVE_SCREENSHOT: bool SAVE_SCREENSHOT: bool
SAVE_DOM: bool SAVE_DOM: bool
SAVE_SINGLEFILE: bool
SAVE_WARC: bool SAVE_WARC: bool
SAVE_GIT: bool SAVE_GIT: bool
SAVE_MEDIA: bool SAVE_MEDIA: bool
@ -75,53 +77,18 @@ class ConfigDict(BaseConfig, total=False):
USE_CURL: bool USE_CURL: bool
USE_WGET: bool USE_WGET: bool
USE_SINGLEFILE: bool
USE_READABILITY: bool
USE_GIT: bool USE_GIT: bool
USE_CHROME: bool USE_CHROME: bool
USE_YOUTUBEDL: bool USE_YOUTUBEDL: bool
USE_SINGLEFILE: bool CURL_BINARY: str
GIT_BINARY: str
CURL_BINARY: Optional[str] WGET_BINARY: str
GIT_BINARY: Optional[str] SINGLEFILE_BINARY: str
WGET_BINARY: Optional[str] READABILITY_BINARY: str
YOUTUBEDL_BINARY: Optional[str] YOUTUBEDL_BINARY: str
CHROME_BINARY: Optional[str] CHROME_BINARY: Optional[str]
SINGLEFILE_BINARY: Optional[str]
TERM_WIDTH: Callable[[], int]
USER: str
ANSI: Dict[str, str]
REPO_DIR: str
PYTHON_DIR: str
TEMPLATES_DIR: str
ARCHIVE_DIR: str
SOURCES_DIR: str
LOGS_DIR: str
URL_BLACKLIST_PTN: Optional[Pattern]
WGET_AUTO_COMPRESSION: bool
ARCHIVEBOX_BINARY: str
VERSION: str
GIT_SHA: str
PYTHON_BINARY: str
PYTHON_ENCODING: str
PYTHON_VERSION: str
DJANGO_BINARY: str
DJANGO_VERSION: str
CURL_VERSION: str
WGET_VERSION: str
YOUTUBEDL_VERSION: str
GIT_VERSION: str
CHROME_VERSION: str
DEPENDENCIES: Dict[str, SimpleConfigValueDict]
CODE_LOCATIONS: Dict[str, SimpleConfigValueDict]
CONFIG_LOCATIONS: Dict[str, SimpleConfigValueDict]
DATA_LOCATIONS: Dict[str, SimpleConfigValueDict]
CHROME_OPTIONS: Dict[str, SimpleConfigValue]
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue] ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]

View file

@ -17,7 +17,7 @@ from ..util import (
from ..config import ( from ..config import (
TIMEOUT, TIMEOUT,
SAVE_READABILITY, SAVE_READABILITY,
READABILITY_BINARY, DEPENDENCIES,
READABILITY_VERSION, READABILITY_VERSION,
) )
from ..logging_util import TimedProgress from ..logging_util import TimedProgress
@ -73,7 +73,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
temp_doc.close() temp_doc.close()
cmd = [ cmd = [
READABILITY_BINARY, DEPENDENCIES['READABILITY_BINARY']['path'],
temp_doc.name temp_doc.name
] ]

View file

@ -15,7 +15,7 @@ from ..util import (
from ..config import ( from ..config import (
TIMEOUT, TIMEOUT,
SAVE_SINGLEFILE, SAVE_SINGLEFILE,
SINGLEFILE_BINARY, DEPENDENCIES,
SINGLEFILE_VERSION, SINGLEFILE_VERSION,
CHROME_BINARY, CHROME_BINARY,
) )
@ -43,7 +43,7 @@ def save_singlefile(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOU
# SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli # SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
cmd = [ cmd = [
SINGLEFILE_BINARY, DEPENDENCIES['SINGLEFILE_BINARY']['path'],
'--browser-executable-path={}'.format(CHROME_BINARY), '--browser-executable-path={}'.format(CHROME_BINARY),
'--browser-args="{}"'.format(json.dumps(browser_args[1:])), '--browser-args="{}"'.format(json.dumps(browser_args[1:])),
link.url, link.url,

View file

@ -127,7 +127,7 @@ class Link:
def __str__(self) -> str: def __str__(self) -> str:
return f'[{self.timestamp}] {self.base_url} "{self.title}"' return f'[{self.timestamp}] {self.url} "{self.title}"'
def __post_init__(self): def __post_init__(self):
self.typecheck() self.typecheck()

View file

@ -99,15 +99,18 @@ class TimedProgress:
if self.SHOW_PROGRESS: if self.SHOW_PROGRESS:
# terminate if we havent already terminated # terminate if we havent already terminated
self.p.terminate()
self.p.join()
self.p.close()
# clear whole terminal line
try: try:
sys.stdout.write('\r{}{}\r'.format((' ' * TERM_WIDTH()), ANSI['reset'])) self.p.terminate()
except (IOError, BrokenPipeError): self.p.join()
# ignore when the parent proc has stopped listening to our stdout self.p.close()
# clear whole terminal line
try:
sys.stdout.write('\r{}{}\r'.format((' ' * TERM_WIDTH()), ANSI['reset']))
except (IOError, BrokenPipeError):
# ignore when the parent proc has stopped listening to our stdout
pass
except ValueError:
pass pass
@ -466,7 +469,10 @@ def printable_folders(folders: Dict[str, Optional["Link"]],
from .index.csv import links_to_csv from .index.csv import links_to_csv
return links_to_csv(folders.values(), cols=csv.split(','), header=True) return links_to_csv(folders.values(), cols=csv.split(','), header=True)
return '\n'.join(f'{folder} {link}' for folder, link in folders.items()) return '\n'.join(
f'{folder} {link and link.url} "{link and link.title}"'
for folder, link in folders.items()
)

View file

@ -114,6 +114,8 @@ ALLOWED_IN_OUTPUT_DIR = {
'venv', 'venv',
'virtualenv', 'virtualenv',
'.virtualenv', '.virtualenv',
'node_modules',
'package-lock.json',
ARCHIVE_DIR_NAME, ARCHIVE_DIR_NAME,
SOURCES_DIR_NAME, SOURCES_DIR_NAME,
LOGS_DIR_NAME, LOGS_DIR_NAME,

1
archivebox/package.json Symbolic link
View file

@ -0,0 +1 @@
../package.json

44
bin/build.sh Executable file
View file

@ -0,0 +1,44 @@
#!/usr/bin/env bash
### Bash Environment Setup
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
# set -o xtrace
set -o errexit
set -o errtrace
set -o nounset
set -o pipefail
IFS=$'\n'
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
source "$REPO_DIR/.venv/bin/activate"
cd "$REPO_DIR"
# echo "[*] Fetching latest docs version"
# cd "$REPO_DIR/docs"
# git pull
# cd "$REPO_DIR"
# echo "[+] Building docs"
# sphinx-apidoc -o docs archivebox
# cd "$REPO_DIR/docs"
# make html
# cd "$REPO_DIR"
echo "[*] Cleaning up build dirs"
cd "$REPO_DIR"
rm -Rf build dist archivebox.egg-info
echo "[+] Building sdist, bdist_egg, and bdist_wheel"
python3 setup.py sdist bdist_egg bdist_wheel
echo "[+] Building docker image in the background..."
docker build . -t archivebox \
-t archivebox:latest > /tmp/archivebox_docker_build.log 2>&1 &
ps "$!"
echo "[√] Done. Install the built package by running:"
echo " python3 setup.py install"
echo " # or"
echo " pip3 install ."

View file

@ -10,29 +10,28 @@ set -o nounset
set -o pipefail set -o pipefail
IFS=$'\n' IFS=$'\n'
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )" REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
VERSION_FILE="$DIR/archivebox/VERSION"
function bump_semver { function bump_semver {
echo "$1" | awk -F. '{$NF = $NF + 1;} 1' | sed 's/ /./g' echo "$1" | awk -F. '{$NF = $NF + 1;} 1' | sed 's/ /./g'
} }
source "$DIR/.venv/bin/activate" source "$REPO_DIR/.venv/bin/activate"
cd "$DIR" cd "$REPO_DIR"
OLD_VERSION="$(cat "$VERSION_FILE")" OLD_VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
NEW_VERSION="$(bump_semver "$OLD_VERSION")" NEW_VERSION="$(bump_semver "$OLD_VERSION")"
echo "[*] Fetching latest docs version" echo "[*] Fetching latest docs version"
cd "$DIR/docs" cd "$REPO_DIR/docs"
git pull git pull
cd "$DIR" cd "$REPO_DIR"
echo "[+] Building docs" echo "[+] Building docs"
sphinx-apidoc -o docs archivebox sphinx-apidoc -o docs archivebox
cd "$DIR/docs" cd "$REPO_DIR/docs"
make html make html
cd "$DIR" cd "$REPO_DIR"
if [ -z "$(git status --porcelain)" ] && [[ "$(git branch --show-current)" == "master" ]]; then if [ -z "$(git status --porcelain)" ] && [[ "$(git branch --show-current)" == "master" ]]; then
git pull git pull
@ -43,19 +42,21 @@ else
fi fi
echo "[*] Bumping VERSION from $OLD_VERSION to $NEW_VERSION" echo "[*] Bumping VERSION from $OLD_VERSION to $NEW_VERSION"
echo "$NEW_VERSION" > "$VERSION_FILE" contents="$(jq ".version = \"$NEW_VERSION\"" "$REPO_DIR/package.json")" && \
git add "$DIR/docs" echo "${contents}" > package.json
git add "$VERSION_FILE" git add "$REPO_DIR/docs"
git add "$REPO_DIR/package.json"
git add "$REPO_DIR/package-lock.json"
echo "[*] Cleaning up build dirs" echo "[*] Cleaning up build dirs"
cd "$DIR" cd "$REPO_DIR"
rm -Rf build dist rm -Rf build dist archivebox.egg-info
echo "[+] Building sdist and bdist_wheel" echo "[+] Building sdist and bdist_wheel"
python3 setup.py sdist bdist_wheel python3 setup.py sdist bdist_egg bdist_wheel
echo "[^] Pushing source to github" echo "[^] Pushing source to github"
git add "$DIR/archivebox.egg-info" git add "$REPO_DIR/archivebox.egg-info"
git commit -m "$NEW_VERSION release" git commit -m "$NEW_VERSION release"
git tag -a "v$NEW_VERSION" -m "v$NEW_VERSION" git tag -a "v$NEW_VERSION" -m "v$NEW_VERSION"
git push origin master git push origin master

View file

@ -31,7 +31,7 @@ services:
# host machine, add tasks and see more info with archivebox schedule --help # host machine, add tasks and see more info with archivebox schedule --help
# scheduler: # scheduler:
# image: nikisweeting/archivebox:latest # image: nikisweeting/archivebox:latest
# command: schedule --foreground --every=day --depth=1 'https://getpocket.com/users/USERNA<E/feed/all' # command: schedule --foreground --every=day --depth=1 'https://getpocket.com/users/USERNAME/feed/all'
# environment: # environment:
# - USE_COLOR=True # - USE_COLOR=True
# - SHOW_PROGRESS=False # - SHOW_PROGRESS=False

2
docs

@ -1 +1 @@
Subproject commit 101aec0bc1e98c1f7b1a42281a686a098ef9cdde Subproject commit 4a7052eb5000f179ece678c0e98eea3cb581c079

6
package-lock.json generated
View file

@ -1,6 +1,6 @@
{ {
"name": "archivebox", "name": "archivebox",
"version": "0.4.14", "version": "0.4.19",
"lockfileVersion": 1, "lockfileVersion": 1,
"requires": true, "requires": true,
"dependencies": { "dependencies": {
@ -902,7 +902,7 @@
"integrity": "sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA==" "integrity": "sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA=="
}, },
"readability-extractor": { "readability-extractor": {
"version": "git+https://github.com/pirate/readability-extractor.git#afa6a5bb8473f629ee3f1e0dcbf093b73d4eff40", "version": "git+https://github.com/pirate/readability-extractor.git#0098f142b0a015c8c90766d3b74d9eb6fb7b7e6a",
"from": "git+https://github.com/pirate/readability-extractor.git", "from": "git+https://github.com/pirate/readability-extractor.git",
"requires": { "requires": {
"@mozilla/readability": "^0.3.0", "@mozilla/readability": "^0.3.0",
@ -1054,7 +1054,7 @@
"integrity": "sha1-SysbJ+uAip+NzEgaWOXlb1mfP2E=" "integrity": "sha1-SysbJ+uAip+NzEgaWOXlb1mfP2E="
}, },
"single-file": { "single-file": {
"version": "git+https://github.com/gildas-lormeau/SingleFile.git#27c1ba673979f593b3c2c6cd353634bf869743f9", "version": "git+https://github.com/gildas-lormeau/SingleFile.git#e2e15381a6cbb9c3a6ca0ea8ff7307174e98ad12",
"from": "git+https://github.com/gildas-lormeau/SingleFile.git", "from": "git+https://github.com/gildas-lormeau/SingleFile.git",
"requires": { "requires": {
"file-url": "^3.0.0", "file-url": "^3.0.0",

View file

@ -1,6 +1,6 @@
{ {
"name": "archivebox", "name": "archivebox",
"version": "0.4.14", "version": "0.4.20",
"description": "ArchiveBox: The self-hosted internet archive", "description": "ArchiveBox: The self-hosted internet archive",
"author": "Nick Sweeting <archivebox-npm@sweeting.me>", "author": "Nick Sweeting <archivebox-npm@sweeting.me>",
"license": "MIT", "license": "MIT",
@ -8,7 +8,9 @@
"archivebox": "./bin/archive" "archivebox": "./bin/archive"
}, },
"bin": { "bin": {
"archivebox": "./bin/archive" "archivebox-node": "./bin/archive",
"single-file": "./node_modules/.bin/single-file",
"readability-extractor": "./node_modules/.bin/readability-extractor"
}, },
"dependencies": { "dependencies": {
"readability-extractor": "git+https://github.com/pirate/readability-extractor.git", "readability-extractor": "git+https://github.com/pirate/readability-extractor.git",

View file

@ -1,18 +1,57 @@
# import sys
import json
import setuptools import setuptools
from pathlib import Path from pathlib import Path
# from subprocess import check_call
# from setuptools.command.install import install
# from setuptools.command.develop import develop
# from setuptools.command.egg_info import egg_info
PKG_NAME = "archivebox" PKG_NAME = "archivebox"
REPO_URL = "https://github.com/pirate/ArchiveBox" REPO_URL = "https://github.com/pirate/ArchiveBox"
BASE_DIR = Path(__file__).parent.resolve() REPO_DIR = Path(__file__).parent.resolve()
SOURCE_DIR = BASE_DIR / PKG_NAME PYTHON_DIR = REPO_DIR / PKG_NAME
README = (BASE_DIR / "README.md").read_text() README = (PYTHON_DIR / "README.md").read_text()
VERSION = (SOURCE_DIR / "VERSION").read_text().strip() VERSION = json.loads((PYTHON_DIR / "package.json").read_text().strip())['version']
# To see when setup.py gets called (uncomment for debugging):
# To see when setup.py gets called (uncomment for debugging)
# import sys # import sys
# print(SOURCE_DIR, f" (v{VERSION})") # print(PYTHON_DIR, f" (v{VERSION})")
# print('>', sys.executable, *sys.argv) # print('>', sys.executable, *sys.argv)
# raise SystemExit(0)
# Sketchy way to install npm dependencies as a pip post-install script
# def setup_js():
# if sys.platform.lower() not in ('darwin', 'linux'):
# sys.stderr.write('[!] Warning: ArchiveBox is not officially supported on this platform.\n')
# sys.stderr.write(f'[+] Installing ArchiveBox npm package (PYTHON_DIR={PYTHON_DIR})...\n')
# try:
# check_call(f'npm install -g "{REPO_DIR}"', shell=True)
# sys.stderr.write('[√] Automatically installed npm dependencies.\n')
# except Exception as err:
# sys.stderr.write(f'[!] Failed to auto-install npm dependencies: {err}\n')
# sys.stderr.write(' Install NPM/npm using your system package manager, then run:\n')
# sys.stderr.write(' npm install -g "git+https://github.com/pirate/ArchiveBox.git\n')
# class CustomInstallCommand(install):
# def run(self):
# super().run()
# setup_js()
# class CustomDevelopCommand(develop):
# def run(self):
# super().run()
# setup_js()
# class CustomEggInfoCommand(egg_info):
# def run(self):
# super().run()
# setup_js()
setuptools.setup( setuptools.setup(
name=PKG_NAME, name=PKG_NAME,
@ -72,13 +111,18 @@ setuptools.setup(
# 'redis': ['redis', 'django-redis'], # 'redis': ['redis', 'django-redis'],
# 'pywb': ['pywb', 'redis'], # 'pywb': ['pywb', 'redis'],
}, },
packages=setuptools.find_packages(), packages=['archivebox'],
include_package_data=True, # see MANIFEST.in
entry_points={ entry_points={
"console_scripts": [ "console_scripts": [
f"{PKG_NAME} = {PKG_NAME}.cli:main", f"{PKG_NAME} = {PKG_NAME}.cli:main",
], ],
}, },
include_package_data=True, # cmdclass={
# 'install': CustomInstallCommand,
# 'develop': CustomDevelopCommand,
# 'egg_info': CustomEggInfoCommand,
# },
classifiers=[ classifiers=[
"License :: OSI Approved :: MIT License", "License :: OSI Approved :: MIT License",
"Natural Language :: English", "Natural Language :: English",