diff --git a/.npmignore b/.npmignore
new file mode 100644
index 00000000..53fae0a8
--- /dev/null
+++ b/.npmignore
@@ -0,0 +1,19 @@
+tests/
+archivebox/
+archivebox.egg-info/
+build/
+dist/
+docs/
+etc/
+.github
+.gitmodules
+.dockerignore
+.flake8
+CNAME
+_config.yml
+docker-compose.yaml
+docker-compose.yml
+Dockerfile
+MANIFEST.in
+Pipfile
+setup.py
diff --git a/MANIFEST.in b/MANIFEST.in
index a73ef711..c9ae1535 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,4 @@
-include LICENSE
-include README.md
-include archivebox/VERSION
-recursive-include archivebox/themes *
+graft archivebox
+global-exclude .DS_Store
+global-exclude __pycache__
+global-exclude *.pyc
diff --git a/README.md b/README.md
index 13159664..4850c9bb 100644
--- a/README.md
+++ b/README.md
@@ -63,10 +63,10 @@ To get started, you can [install them manually](https://github.com/pirate/Archiv
```bash
# Docker
mkdir data && cd data
-docker run -v $PWD:/data nikisweeting/archivebox init
-docker run -v $PWD:/data nikisweeting/archivebox add 'https://example.com'
+docker run -v $PWD:/data -it nikisweeting/archivebox init
+docker run -v $PWD:/data -it nikisweeting/archivebox add 'https://example.com'
docker run -v $PWD:/data -it nikisweeting/archivebox manage createsuperuser
-docker run -v $PWD:/data -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000
+docker run -v $PWD:/data -it -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000
open http://127.0.0.1:8000
```
diff --git a/archivebox.egg-info/PKG-INFO b/archivebox.egg-info/PKG-INFO
index ba659878..a7301407 100644
--- a/archivebox.egg-info/PKG-INFO
+++ b/archivebox.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: archivebox
-Version: 0.4.17
+Version: 0.4.20
Summary: The self-hosted internet archive.
Home-page: https://github.com/pirate/ArchiveBox
Author: Nick Sweeting
@@ -77,10 +77,10 @@ Description:
```bash
# Docker
mkdir data && cd data
- docker run -v $PWD:/data nikisweeting/archivebox init
- docker run -v $PWD:/data nikisweeting/archivebox add 'https://example.com'
+ docker run -v $PWD:/data -it nikisweeting/archivebox init
+ docker run -v $PWD:/data -it nikisweeting/archivebox add 'https://example.com'
docker run -v $PWD:/data -it nikisweeting/archivebox manage createsuperuser
- docker run -v $PWD:/data -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000
+ docker run -v $PWD:/data -it -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000
open http://127.0.0.1:8000
```
diff --git a/archivebox.egg-info/SOURCES.txt b/archivebox.egg-info/SOURCES.txt
index ee6a2fc5..d186b2fb 100644
--- a/archivebox.egg-info/SOURCES.txt
+++ b/archivebox.egg-info/SOURCES.txt
@@ -1,13 +1,16 @@
-LICENSE
MANIFEST.in
README.md
setup.py
-archivebox/VERSION
+archivebox/.flake8
+archivebox/LICENSE
+archivebox/README.md
archivebox/__init__.py
archivebox/__main__.py
archivebox/logging_util.py
archivebox/main.py
archivebox/manage.py
+archivebox/mypy.ini
+archivebox/package.json
archivebox/system.py
archivebox/util.py
archivebox.egg-info/PKG-INFO
@@ -45,6 +48,7 @@ archivebox/core/urls.py
archivebox/core/views.py
archivebox/core/welcome_message.py
archivebox/core/wsgi.py
+archivebox/core/management/commands/archivebox.py
archivebox/core/migrations/0001_initial.py
archivebox/core/migrations/0002_auto_20200625_1521.py
archivebox/core/migrations/0003_auto_20200630_1034.py
@@ -110,16 +114,4 @@ archivebox/themes/legacy/static/jquery.min.js
archivebox/themes/legacy/static/sort_asc.png
archivebox/themes/legacy/static/sort_both.png
archivebox/themes/legacy/static/sort_desc.png
-archivebox/themes/legacy/static/spinner.gif
-tests/__init__.py
-tests/conftest.py
-tests/fixtures.py
-tests/test_args.py
-tests/test_extractors.py
-tests/test_init.py
-tests/test_oneshot.py
-tests/test_remove.py
-tests/test_title.py
-tests/test_util.py
-tests/mock_server/__init__.py
-tests/mock_server/server.py
\ No newline at end of file
+archivebox/themes/legacy/static/spinner.gif
\ No newline at end of file
diff --git a/archivebox.egg-info/top_level.txt b/archivebox.egg-info/top_level.txt
index 9c6a6349..74056b65 100644
--- a/archivebox.egg-info/top_level.txt
+++ b/archivebox.egg-info/top_level.txt
@@ -1,2 +1 @@
archivebox
-tests
diff --git a/archivebox/LICENSE b/archivebox/LICENSE
new file mode 120000
index 00000000..ea5b6064
--- /dev/null
+++ b/archivebox/LICENSE
@@ -0,0 +1 @@
+../LICENSE
\ No newline at end of file
diff --git a/archivebox/README.md b/archivebox/README.md
new file mode 120000
index 00000000..32d46ee8
--- /dev/null
+++ b/archivebox/README.md
@@ -0,0 +1 @@
+../README.md
\ No newline at end of file
diff --git a/archivebox/VERSION b/archivebox/VERSION
deleted file mode 100644
index 7040b811..00000000
--- a/archivebox/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.4.17
diff --git a/archivebox/cli/__init__.py b/archivebox/cli/__init__.py
index 70a6866e..aa26715b 100644
--- a/archivebox/cli/__init__.py
+++ b/archivebox/cli/__init__.py
@@ -104,11 +104,11 @@ def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided,
)
command = parser.parse_args(args or ())
- if command.help or command.subcommand is None:
- command.subcommand = 'help'
- elif command.version:
+ if command.version:
command.subcommand = 'version'
-
+ elif command.help or command.subcommand is None:
+ command.subcommand = 'help'
+
if command.subcommand not in ('help', 'version', 'status'):
from ..logging_util import log_cli_command
diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 021939e0..b22dfbfe 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -4,10 +4,11 @@ import os
import io
import re
import sys
-import django
+import json
import getpass
import shutil
import platform
+import django
from hashlib import md5
from pathlib import Path
@@ -58,7 +59,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
'MEDIA_TIMEOUT': {'type': int, 'default': 3600},
'OUTPUT_PERMISSIONS': {'type': str, 'default': '755'},
'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'},
- 'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2)(\?.*)?$'}, # to avoid downloading code assets as their own pages
+ 'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'}, # to avoid downloading code assets as their own pages
},
'SERVER_CONFIG': {
@@ -186,7 +187,6 @@ STATICFILE_EXTENSIONS = {
# html, htm, shtml, xhtml, xml, aspx, php, cgi
}
-VERSION_FILENAME = 'VERSION'
PYTHON_DIR_NAME = 'archivebox'
TEMPLATES_DIR_NAME = 'themes'
@@ -232,10 +232,10 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
'CONFIG_FILE': {'default': lambda c: os.path.abspath(os.path.expanduser(c['CONFIG_FILE'])) if c['CONFIG_FILE'] else os.path.join(c['OUTPUT_DIR'], CONFIG_FILENAME)},
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and os.path.abspath(os.path.expanduser(c['COOKIES_FILE']))},
'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (os.path.abspath(os.path.expanduser(c['CHROME_USER_DATA_DIR'])) or None)},
- 'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'], re.IGNORECASE | re.UNICODE | re.MULTILINE)},
+ 'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0]},
- 'VERSION': {'default': lambda c: open(os.path.join(c['PYTHON_DIR'], VERSION_FILENAME), 'r').read().strip()},
+ 'VERSION': {'default': lambda c: json.loads((Path(c['PYTHON_DIR']) / 'package.json').read_text().strip())['version']},
'GIT_SHA': {'default': lambda c: c['VERSION'].split('+')[-1] or 'unknown'},
'PYTHON_BINARY': {'default': lambda c: sys.executable},
@@ -510,16 +510,9 @@ def bin_version(binary: Optional[str]) -> Optional[str]:
return None
try:
- if binary.split('/')[-1] in ('single-file',):
- # these dependencies dont support the --version flag, but are valid still
- if run([abspath, "--help"], stdout=PIPE).returncode == 0:
- return '0.0.0'
- else:
- return None
- else:
- version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode()
- # take first 3 columns of first line of version info
- return ' '.join(version_str.split('\n')[0].strip().split()[:3])
+ version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode()
+ # take first 3 columns of first line of version info
+ return ' '.join(version_str.split('\n')[0].strip().split()[:3])
except OSError:
pass
# stderr(f'[X] Unable to find working version of dependency: {binary}', color='red')
@@ -534,6 +527,10 @@ def bin_path(binary: Optional[str]) -> Optional[str]:
if binary is None:
return None
+ node_modules_bin = Path('.') / 'node_modules' / '.bin' / binary
+ if node_modules_bin.exists():
+ return str(node_modules_bin.resolve())
+
return shutil.which(os.path.expanduser(binary)) or binary
def bin_hash(binary: Optional[str]) -> Optional[str]:
@@ -784,6 +781,10 @@ globals().update(CONFIG)
# Timezone set as UTC
os.environ["TZ"] = 'UTC'
+# add ./node_modules/.bin to $PATH so we can use node scripts in extractors
+NODE_BIN_PATH = str((Path(CONFIG["OUTPUT_DIR"]).absolute() / 'node_modules' / '.bin'))
+sys.path.append(NODE_BIN_PATH)
+
############################## Importable Checkers #############################
@@ -825,16 +826,6 @@ def check_system_config(config: ConfigDict=CONFIG) -> None:
stderr(' CHROME_USER_DATA_DIR="{}"'.format(config['CHROME_USER_DATA_DIR'].split('/Default')[0]))
raise SystemExit(2)
-def print_dependency_additional_info(dependency: str) -> None:
- if dependency == "SINGLEFILE_BINARY":
- hint(('npm install -g git+https://github.com/gildas-lormeau/SingleFile.git"',
- 'or set SAVE_SINGLEFILE=False to silence this warning',
- ''))
- if dependency == "READABILITY_BINARY":
- hint(('npm install -g git+https://github.com/pirate/readability-extractor.git"',
- 'or set SAVE_READABILITY=False to silence this warning',
- ''))
-
def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
invalid_dependencies = [
@@ -851,9 +842,10 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
info['version'] or 'unable to detect version',
)
)
- print_dependency_additional_info(dependency)
- stderr(' {lightred}Hint:{reset} To get more info on dependencies run:'.format(**ANSI))
- stderr(' archivebox --version')
+ if dependency in ('SINGLEFILE_BINARY', 'READABILITY_BINARY'):
+ hint(('npm install --prefix . "git+https://github.com/pirate/ArchiveBox.git"',
+ f'or archivebox config --set SAVE_{dependency.rsplit("_", 1)[0]}=False to silence this warning',
+ ''), prefix=' ')
stderr('')
if config['TIMEOUT'] < 5:
diff --git a/archivebox/config/stubs.py b/archivebox/config/stubs.py
index 0e984624..542691ae 100644
--- a/archivebox/config/stubs.py
+++ b/archivebox/config/stubs.py
@@ -31,15 +31,16 @@ class ConfigDict(BaseConfig, total=False):
SHOW_PROGRESS: bool
IN_DOCKER: bool
- OUTPUT_DIR: str
- CONFIG_FILE: str
+ OUTPUT_DIR: Optional[str]
+ CONFIG_FILE: Optional[str]
ONLY_NEW: bool
TIMEOUT: int
MEDIA_TIMEOUT: int
OUTPUT_PERMISSIONS: str
- URL_BLACKLIST: Optional[str]
+ RESTRICT_FILE_NAMES: str
+ URL_BLACKLIST: str
- SECRET_KEY: str
+ SECRET_KEY: Optional[str]
BIND_ADDR: str
ALLOWED_HOSTS: str
DEBUG: bool
@@ -52,10 +53,11 @@ class ConfigDict(BaseConfig, total=False):
SAVE_FAVICON: bool
SAVE_WGET: bool
SAVE_WGET_REQUISITES: bool
+ SAVE_SINGLEFILE: bool
+ SAVE_READABILITY: bool
SAVE_PDF: bool
SAVE_SCREENSHOT: bool
SAVE_DOM: bool
- SAVE_SINGLEFILE: bool
SAVE_WARC: bool
SAVE_GIT: bool
SAVE_MEDIA: bool
@@ -75,53 +77,18 @@ class ConfigDict(BaseConfig, total=False):
USE_CURL: bool
USE_WGET: bool
+ USE_SINGLEFILE: bool
+ USE_READABILITY: bool
USE_GIT: bool
USE_CHROME: bool
USE_YOUTUBEDL: bool
- USE_SINGLEFILE: bool
-
- CURL_BINARY: Optional[str]
- GIT_BINARY: Optional[str]
- WGET_BINARY: Optional[str]
- YOUTUBEDL_BINARY: Optional[str]
+ CURL_BINARY: str
+ GIT_BINARY: str
+ WGET_BINARY: str
+ SINGLEFILE_BINARY: str
+ READABILITY_BINARY: str
+ YOUTUBEDL_BINARY: str
CHROME_BINARY: Optional[str]
- SINGLEFILE_BINARY: Optional[str]
-
- TERM_WIDTH: Callable[[], int]
- USER: str
- ANSI: Dict[str, str]
- REPO_DIR: str
- PYTHON_DIR: str
- TEMPLATES_DIR: str
- ARCHIVE_DIR: str
- SOURCES_DIR: str
- LOGS_DIR: str
-
- URL_BLACKLIST_PTN: Optional[Pattern]
- WGET_AUTO_COMPRESSION: bool
-
- ARCHIVEBOX_BINARY: str
- VERSION: str
- GIT_SHA: str
-
- PYTHON_BINARY: str
- PYTHON_ENCODING: str
- PYTHON_VERSION: str
-
- DJANGO_BINARY: str
- DJANGO_VERSION: str
-
- CURL_VERSION: str
- WGET_VERSION: str
- YOUTUBEDL_VERSION: str
- GIT_VERSION: str
- CHROME_VERSION: str
-
- DEPENDENCIES: Dict[str, SimpleConfigValueDict]
- CODE_LOCATIONS: Dict[str, SimpleConfigValueDict]
- CONFIG_LOCATIONS: Dict[str, SimpleConfigValueDict]
- DATA_LOCATIONS: Dict[str, SimpleConfigValueDict]
- CHROME_OPTIONS: Dict[str, SimpleConfigValue]
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
diff --git a/archivebox/extractors/readability.py b/archivebox/extractors/readability.py
index f181160d..219402b5 100644
--- a/archivebox/extractors/readability.py
+++ b/archivebox/extractors/readability.py
@@ -17,7 +17,7 @@ from ..util import (
from ..config import (
TIMEOUT,
SAVE_READABILITY,
- READABILITY_BINARY,
+ DEPENDENCIES,
READABILITY_VERSION,
)
from ..logging_util import TimedProgress
@@ -73,7 +73,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
temp_doc.close()
cmd = [
- READABILITY_BINARY,
+ DEPENDENCIES['READABILITY_BINARY']['path'],
temp_doc.name
]
diff --git a/archivebox/extractors/singlefile.py b/archivebox/extractors/singlefile.py
index 87e7d5fd..702e44a0 100644
--- a/archivebox/extractors/singlefile.py
+++ b/archivebox/extractors/singlefile.py
@@ -15,7 +15,7 @@ from ..util import (
from ..config import (
TIMEOUT,
SAVE_SINGLEFILE,
- SINGLEFILE_BINARY,
+ DEPENDENCIES,
SINGLEFILE_VERSION,
CHROME_BINARY,
)
@@ -43,7 +43,7 @@ def save_singlefile(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOU
# SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
cmd = [
- SINGLEFILE_BINARY,
+ DEPENDENCIES['SINGLEFILE_BINARY']['path'],
'--browser-executable-path={}'.format(CHROME_BINARY),
'--browser-args="{}"'.format(json.dumps(browser_args[1:])),
link.url,
diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index c3b6ce8c..d6ab601f 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -127,7 +127,7 @@ class Link:
def __str__(self) -> str:
- return f'[{self.timestamp}] {self.base_url} "{self.title}"'
+ return f'[{self.timestamp}] {self.url} "{self.title}"'
def __post_init__(self):
self.typecheck()
diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py
index 71a50c56..b10fc10b 100644
--- a/archivebox/logging_util.py
+++ b/archivebox/logging_util.py
@@ -99,15 +99,18 @@ class TimedProgress:
if self.SHOW_PROGRESS:
# terminate if we havent already terminated
- self.p.terminate()
- self.p.join()
- self.p.close()
-
- # clear whole terminal line
try:
- sys.stdout.write('\r{}{}\r'.format((' ' * TERM_WIDTH()), ANSI['reset']))
- except (IOError, BrokenPipeError):
- # ignore when the parent proc has stopped listening to our stdout
+ self.p.terminate()
+ self.p.join()
+ self.p.close()
+
+ # clear whole terminal line
+ try:
+ sys.stdout.write('\r{}{}\r'.format((' ' * TERM_WIDTH()), ANSI['reset']))
+ except (IOError, BrokenPipeError):
+ # ignore when the parent proc has stopped listening to our stdout
+ pass
+ except ValueError:
pass
@@ -466,7 +469,10 @@ def printable_folders(folders: Dict[str, Optional["Link"]],
from .index.csv import links_to_csv
return links_to_csv(folders.values(), cols=csv.split(','), header=True)
- return '\n'.join(f'{folder} {link}' for folder, link in folders.items())
+ return '\n'.join(
+ f'{folder} {link and link.url} "{link and link.title}"'
+ for folder, link in folders.items()
+ )
diff --git a/archivebox/main.py b/archivebox/main.py
index b65c6e64..3958405c 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -114,6 +114,8 @@ ALLOWED_IN_OUTPUT_DIR = {
'venv',
'virtualenv',
'.virtualenv',
+ 'node_modules',
+ 'package-lock.json',
ARCHIVE_DIR_NAME,
SOURCES_DIR_NAME,
LOGS_DIR_NAME,
diff --git a/archivebox/package.json b/archivebox/package.json
new file mode 120000
index 00000000..4e26811d
--- /dev/null
+++ b/archivebox/package.json
@@ -0,0 +1 @@
+../package.json
\ No newline at end of file
diff --git a/bin/build.sh b/bin/build.sh
new file mode 100755
index 00000000..5df6721a
--- /dev/null
+++ b/bin/build.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+
+### Bash Environment Setup
+# http://redsymbol.net/articles/unofficial-bash-strict-mode/
+# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
+# set -o xtrace
+set -o errexit
+set -o errtrace
+set -o nounset
+set -o pipefail
+IFS=$'\n'
+
+REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
+
+source "$REPO_DIR/.venv/bin/activate"
+cd "$REPO_DIR"
+
+# echo "[*] Fetching latest docs version"
+# cd "$REPO_DIR/docs"
+# git pull
+# cd "$REPO_DIR"
+
+# echo "[+] Building docs"
+# sphinx-apidoc -o docs archivebox
+# cd "$REPO_DIR/docs"
+# make html
+# cd "$REPO_DIR"
+
+echo "[*] Cleaning up build dirs"
+cd "$REPO_DIR"
+rm -Rf build dist archivebox.egg-info
+
+echo "[+] Building sdist, bdist_egg, and bdist_wheel"
+python3 setup.py sdist bdist_egg bdist_wheel
+
+echo "[+] Building docker image in the background..."
+docker build . -t archivebox \
+ -t archivebox:latest > /tmp/archivebox_docker_build.log 2>&1 &
+ps "$!"
+
+echo "[√] Done. Install the built package by running:"
+echo " python3 setup.py install"
+echo " # or"
+echo " pip3 install ."
diff --git a/bin/release.sh b/bin/release.sh
index 7f5a7db4..16cde4d3 100755
--- a/bin/release.sh
+++ b/bin/release.sh
@@ -10,29 +10,28 @@ set -o nounset
set -o pipefail
IFS=$'\n'
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
-VERSION_FILE="$DIR/archivebox/VERSION"
+REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
function bump_semver {
echo "$1" | awk -F. '{$NF = $NF + 1;} 1' | sed 's/ /./g'
}
-source "$DIR/.venv/bin/activate"
-cd "$DIR"
+source "$REPO_DIR/.venv/bin/activate"
+cd "$REPO_DIR"
-OLD_VERSION="$(cat "$VERSION_FILE")"
+OLD_VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
NEW_VERSION="$(bump_semver "$OLD_VERSION")"
echo "[*] Fetching latest docs version"
-cd "$DIR/docs"
+cd "$REPO_DIR/docs"
git pull
-cd "$DIR"
+cd "$REPO_DIR"
echo "[+] Building docs"
sphinx-apidoc -o docs archivebox
-cd "$DIR/docs"
+cd "$REPO_DIR/docs"
make html
-cd "$DIR"
+cd "$REPO_DIR"
if [ -z "$(git status --porcelain)" ] && [[ "$(git branch --show-current)" == "master" ]]; then
git pull
@@ -43,19 +42,21 @@ else
fi
echo "[*] Bumping VERSION from $OLD_VERSION to $NEW_VERSION"
-echo "$NEW_VERSION" > "$VERSION_FILE"
-git add "$DIR/docs"
-git add "$VERSION_FILE"
+contents="$(jq ".version = \"$NEW_VERSION\"" "$REPO_DIR/package.json")" && \
+echo "${contents}" > package.json
+git add "$REPO_DIR/docs"
+git add "$REPO_DIR/package.json"
+git add "$REPO_DIR/package-lock.json"
echo "[*] Cleaning up build dirs"
-cd "$DIR"
-rm -Rf build dist
+cd "$REPO_DIR"
+rm -Rf build dist archivebox.egg-info
echo "[+] Building sdist and bdist_wheel"
-python3 setup.py sdist bdist_wheel
+python3 setup.py sdist bdist_egg bdist_wheel
echo "[^] Pushing source to github"
-git add "$DIR/archivebox.egg-info"
+git add "$REPO_DIR/archivebox.egg-info"
git commit -m "$NEW_VERSION release"
git tag -a "v$NEW_VERSION" -m "v$NEW_VERSION"
git push origin master
diff --git a/docker-compose.yml b/docker-compose.yml
index c8733e1b..a209e959 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -31,7 +31,7 @@ services:
# host machine, add tasks and see more info with archivebox schedule --help
# scheduler:
# image: nikisweeting/archivebox:latest
- # command: schedule --foreground --every=day --depth=1 'https://getpocket.com/users/USERNA",
"license": "MIT",
@@ -8,7 +8,9 @@
"archivebox": "./bin/archive"
},
"bin": {
- "archivebox": "./bin/archive"
+ "archivebox-node": "./bin/archive",
+ "single-file": "./node_modules/.bin/single-file",
+ "readability-extractor": "./node_modules/.bin/readability-extractor"
},
"dependencies": {
"readability-extractor": "git+https://github.com/pirate/readability-extractor.git",
diff --git a/setup.py b/setup.py
index e4794f00..db83e9bf 100755
--- a/setup.py
+++ b/setup.py
@@ -1,18 +1,57 @@
+# import sys
+import json
import setuptools
+
from pathlib import Path
+# from subprocess import check_call
+# from setuptools.command.install import install
+# from setuptools.command.develop import develop
+# from setuptools.command.egg_info import egg_info
+
PKG_NAME = "archivebox"
REPO_URL = "https://github.com/pirate/ArchiveBox"
-BASE_DIR = Path(__file__).parent.resolve()
-SOURCE_DIR = BASE_DIR / PKG_NAME
-README = (BASE_DIR / "README.md").read_text()
-VERSION = (SOURCE_DIR / "VERSION").read_text().strip()
+REPO_DIR = Path(__file__).parent.resolve()
+PYTHON_DIR = REPO_DIR / PKG_NAME
+README = (PYTHON_DIR / "README.md").read_text()
+VERSION = json.loads((PYTHON_DIR / "package.json").read_text().strip())['version']
+
+# To see when setup.py gets called (uncomment for debugging):
-# To see when setup.py gets called (uncomment for debugging)
# import sys
-# print(SOURCE_DIR, f" (v{VERSION})")
+# print(PYTHON_DIR, f" (v{VERSION})")
# print('>', sys.executable, *sys.argv)
-# raise SystemExit(0)
+
+# Sketchy way to install npm dependencies as a pip post-install script
+
+# def setup_js():
+# if sys.platform.lower() not in ('darwin', 'linux'):
+# sys.stderr.write('[!] Warning: ArchiveBox is not officially supported on this platform.\n')
+
+# sys.stderr.write(f'[+] Installing ArchiveBox npm package (PYTHON_DIR={PYTHON_DIR})...\n')
+# try:
+# check_call(f'npm install -g "{REPO_DIR}"', shell=True)
+# sys.stderr.write('[√] Automatically installed npm dependencies.\n')
+# except Exception as err:
+# sys.stderr.write(f'[!] Failed to auto-install npm dependencies: {err}\n')
+# sys.stderr.write(' Install NPM/npm using your system package manager, then run:\n')
+# sys.stderr.write(' npm install -g "git+https://github.com/pirate/ArchiveBox.git\n')
+
+
+# class CustomInstallCommand(install):
+# def run(self):
+# super().run()
+# setup_js()
+
+# class CustomDevelopCommand(develop):
+# def run(self):
+# super().run()
+# setup_js()
+
+# class CustomEggInfoCommand(egg_info):
+# def run(self):
+# super().run()
+# setup_js()
setuptools.setup(
name=PKG_NAME,
@@ -72,13 +111,18 @@ setuptools.setup(
# 'redis': ['redis', 'django-redis'],
# 'pywb': ['pywb', 'redis'],
},
- packages=setuptools.find_packages(),
+ packages=['archivebox'],
+ include_package_data=True, # see MANIFEST.in
entry_points={
"console_scripts": [
f"{PKG_NAME} = {PKG_NAME}.cli:main",
],
},
- include_package_data=True,
+ # cmdclass={
+ # 'install': CustomInstallCommand,
+ # 'develop': CustomDevelopCommand,
+ # 'egg_info': CustomEggInfoCommand,
+ # },
classifiers=[
"License :: OSI Approved :: MIT License",
"Natural Language :: English",