diff --git a/.npmignore b/.npmignore new file mode 100644 index 00000000..53fae0a8 --- /dev/null +++ b/.npmignore @@ -0,0 +1,19 @@ +tests/ +archivebox/ +archivebox.egg-info/ +build/ +dist/ +docs/ +etc/ +.github +.gitmodules +.dockerignore +.flake8 +CNAME +_config.yml +docker-compose.yaml +docker-compose.yml +Dockerfile +MANIFEST.in +Pipfile +setup.py diff --git a/MANIFEST.in b/MANIFEST.in index a73ef711..c9ae1535 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ -include LICENSE -include README.md -include archivebox/VERSION -recursive-include archivebox/themes * +graft archivebox +global-exclude .DS_Store +global-exclude __pycache__ +global-exclude *.pyc diff --git a/README.md b/README.md index 13159664..4850c9bb 100644 --- a/README.md +++ b/README.md @@ -63,10 +63,10 @@ To get started, you can [install them manually](https://github.com/pirate/Archiv ```bash # Docker mkdir data && cd data -docker run -v $PWD:/data nikisweeting/archivebox init -docker run -v $PWD:/data nikisweeting/archivebox add 'https://example.com' +docker run -v $PWD:/data -it nikisweeting/archivebox init +docker run -v $PWD:/data -it nikisweeting/archivebox add 'https://example.com' docker run -v $PWD:/data -it nikisweeting/archivebox manage createsuperuser -docker run -v $PWD:/data -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000 +docker run -v $PWD:/data -it -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000 open http://127.0.0.1:8000 ``` diff --git a/archivebox.egg-info/PKG-INFO b/archivebox.egg-info/PKG-INFO index ba659878..a7301407 100644 --- a/archivebox.egg-info/PKG-INFO +++ b/archivebox.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: archivebox -Version: 0.4.17 +Version: 0.4.20 Summary: The self-hosted internet archive. Home-page: https://github.com/pirate/ArchiveBox Author: Nick Sweeting @@ -77,10 +77,10 @@ Description:
```bash # Docker mkdir data && cd data - docker run -v $PWD:/data nikisweeting/archivebox init - docker run -v $PWD:/data nikisweeting/archivebox add 'https://example.com' + docker run -v $PWD:/data -it nikisweeting/archivebox init + docker run -v $PWD:/data -it nikisweeting/archivebox add 'https://example.com' docker run -v $PWD:/data -it nikisweeting/archivebox manage createsuperuser - docker run -v $PWD:/data -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000 + docker run -v $PWD:/data -it -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000 open http://127.0.0.1:8000 ``` diff --git a/archivebox.egg-info/SOURCES.txt b/archivebox.egg-info/SOURCES.txt index ee6a2fc5..d186b2fb 100644 --- a/archivebox.egg-info/SOURCES.txt +++ b/archivebox.egg-info/SOURCES.txt @@ -1,13 +1,16 @@ -LICENSE MANIFEST.in README.md setup.py -archivebox/VERSION +archivebox/.flake8 +archivebox/LICENSE +archivebox/README.md archivebox/__init__.py archivebox/__main__.py archivebox/logging_util.py archivebox/main.py archivebox/manage.py +archivebox/mypy.ini +archivebox/package.json archivebox/system.py archivebox/util.py archivebox.egg-info/PKG-INFO @@ -45,6 +48,7 @@ archivebox/core/urls.py archivebox/core/views.py archivebox/core/welcome_message.py archivebox/core/wsgi.py +archivebox/core/management/commands/archivebox.py archivebox/core/migrations/0001_initial.py archivebox/core/migrations/0002_auto_20200625_1521.py archivebox/core/migrations/0003_auto_20200630_1034.py @@ -110,16 +114,4 @@ archivebox/themes/legacy/static/jquery.min.js archivebox/themes/legacy/static/sort_asc.png archivebox/themes/legacy/static/sort_both.png archivebox/themes/legacy/static/sort_desc.png -archivebox/themes/legacy/static/spinner.gif -tests/__init__.py -tests/conftest.py -tests/fixtures.py -tests/test_args.py -tests/test_extractors.py -tests/test_init.py -tests/test_oneshot.py -tests/test_remove.py -tests/test_title.py -tests/test_util.py -tests/mock_server/__init__.py -tests/mock_server/server.py \ No newline at end of file +archivebox/themes/legacy/static/spinner.gif \ No newline at end of file diff --git a/archivebox.egg-info/top_level.txt b/archivebox.egg-info/top_level.txt index 9c6a6349..74056b65 100644 --- a/archivebox.egg-info/top_level.txt +++ b/archivebox.egg-info/top_level.txt @@ -1,2 +1 @@ archivebox -tests diff --git a/archivebox/LICENSE b/archivebox/LICENSE new file mode 120000 index 00000000..ea5b6064 --- /dev/null +++ b/archivebox/LICENSE @@ -0,0 +1 @@ +../LICENSE \ No newline at end of file diff --git a/archivebox/README.md b/archivebox/README.md new file mode 120000 index 00000000..32d46ee8 --- /dev/null +++ b/archivebox/README.md @@ -0,0 +1 @@ +../README.md \ No newline at end of file diff --git a/archivebox/VERSION b/archivebox/VERSION deleted file mode 100644 index 7040b811..00000000 --- a/archivebox/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.4.17 diff --git a/archivebox/cli/__init__.py b/archivebox/cli/__init__.py index 70a6866e..aa26715b 100644 --- a/archivebox/cli/__init__.py +++ b/archivebox/cli/__init__.py @@ -104,11 +104,11 @@ def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided, ) command = parser.parse_args(args or ()) - if command.help or command.subcommand is None: - command.subcommand = 'help' - elif command.version: + if command.version: command.subcommand = 'version' - + elif command.help or command.subcommand is None: + command.subcommand = 'help' + if command.subcommand not in ('help', 'version', 'status'): from ..logging_util import log_cli_command diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py index 021939e0..b22dfbfe 100644 --- a/archivebox/config/__init__.py +++ b/archivebox/config/__init__.py @@ -4,10 +4,11 @@ import os import io import re import sys -import django +import json import getpass import shutil import platform +import django from hashlib import md5 from pathlib import Path @@ -58,7 +59,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = { 'MEDIA_TIMEOUT': {'type': int, 'default': 3600}, 'OUTPUT_PERMISSIONS': {'type': str, 'default': '755'}, 'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'}, - 'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2)(\?.*)?$'}, # to avoid downloading code assets as their own pages + 'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'}, # to avoid downloading code assets as their own pages }, 'SERVER_CONFIG': { @@ -186,7 +187,6 @@ STATICFILE_EXTENSIONS = { # html, htm, shtml, xhtml, xml, aspx, php, cgi } -VERSION_FILENAME = 'VERSION' PYTHON_DIR_NAME = 'archivebox' TEMPLATES_DIR_NAME = 'themes' @@ -232,10 +232,10 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = { 'CONFIG_FILE': {'default': lambda c: os.path.abspath(os.path.expanduser(c['CONFIG_FILE'])) if c['CONFIG_FILE'] else os.path.join(c['OUTPUT_DIR'], CONFIG_FILENAME)}, 'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and os.path.abspath(os.path.expanduser(c['COOKIES_FILE']))}, 'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (os.path.abspath(os.path.expanduser(c['CHROME_USER_DATA_DIR'])) or None)}, - 'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'], re.IGNORECASE | re.UNICODE | re.MULTILINE)}, + 'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)}, 'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0]}, - 'VERSION': {'default': lambda c: open(os.path.join(c['PYTHON_DIR'], VERSION_FILENAME), 'r').read().strip()}, + 'VERSION': {'default': lambda c: json.loads((Path(c['PYTHON_DIR']) / 'package.json').read_text().strip())['version']}, 'GIT_SHA': {'default': lambda c: c['VERSION'].split('+')[-1] or 'unknown'}, 'PYTHON_BINARY': {'default': lambda c: sys.executable}, @@ -510,16 +510,9 @@ def bin_version(binary: Optional[str]) -> Optional[str]: return None try: - if binary.split('/')[-1] in ('single-file',): - # these dependencies dont support the --version flag, but are valid still - if run([abspath, "--help"], stdout=PIPE).returncode == 0: - return '0.0.0' - else: - return None - else: - version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode() - # take first 3 columns of first line of version info - return ' '.join(version_str.split('\n')[0].strip().split()[:3]) + version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode() + # take first 3 columns of first line of version info + return ' '.join(version_str.split('\n')[0].strip().split()[:3]) except OSError: pass # stderr(f'[X] Unable to find working version of dependency: {binary}', color='red') @@ -534,6 +527,10 @@ def bin_path(binary: Optional[str]) -> Optional[str]: if binary is None: return None + node_modules_bin = Path('.') / 'node_modules' / '.bin' / binary + if node_modules_bin.exists(): + return str(node_modules_bin.resolve()) + return shutil.which(os.path.expanduser(binary)) or binary def bin_hash(binary: Optional[str]) -> Optional[str]: @@ -784,6 +781,10 @@ globals().update(CONFIG) # Timezone set as UTC os.environ["TZ"] = 'UTC' +# add ./node_modules/.bin to $PATH so we can use node scripts in extractors +NODE_BIN_PATH = str((Path(CONFIG["OUTPUT_DIR"]).absolute() / 'node_modules' / '.bin')) +sys.path.append(NODE_BIN_PATH) + ############################## Importable Checkers ############################# @@ -825,16 +826,6 @@ def check_system_config(config: ConfigDict=CONFIG) -> None: stderr(' CHROME_USER_DATA_DIR="{}"'.format(config['CHROME_USER_DATA_DIR'].split('/Default')[0])) raise SystemExit(2) -def print_dependency_additional_info(dependency: str) -> None: - if dependency == "SINGLEFILE_BINARY": - hint(('npm install -g git+https://github.com/gildas-lormeau/SingleFile.git"', - 'or set SAVE_SINGLEFILE=False to silence this warning', - '')) - if dependency == "READABILITY_BINARY": - hint(('npm install -g git+https://github.com/pirate/readability-extractor.git"', - 'or set SAVE_READABILITY=False to silence this warning', - '')) - def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None: invalid_dependencies = [ @@ -851,9 +842,10 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None: info['version'] or 'unable to detect version', ) ) - print_dependency_additional_info(dependency) - stderr(' {lightred}Hint:{reset} To get more info on dependencies run:'.format(**ANSI)) - stderr(' archivebox --version') + if dependency in ('SINGLEFILE_BINARY', 'READABILITY_BINARY'): + hint(('npm install --prefix . "git+https://github.com/pirate/ArchiveBox.git"', + f'or archivebox config --set SAVE_{dependency.rsplit("_", 1)[0]}=False to silence this warning', + ''), prefix=' ') stderr('') if config['TIMEOUT'] < 5: diff --git a/archivebox/config/stubs.py b/archivebox/config/stubs.py index 0e984624..542691ae 100644 --- a/archivebox/config/stubs.py +++ b/archivebox/config/stubs.py @@ -31,15 +31,16 @@ class ConfigDict(BaseConfig, total=False): SHOW_PROGRESS: bool IN_DOCKER: bool - OUTPUT_DIR: str - CONFIG_FILE: str + OUTPUT_DIR: Optional[str] + CONFIG_FILE: Optional[str] ONLY_NEW: bool TIMEOUT: int MEDIA_TIMEOUT: int OUTPUT_PERMISSIONS: str - URL_BLACKLIST: Optional[str] + RESTRICT_FILE_NAMES: str + URL_BLACKLIST: str - SECRET_KEY: str + SECRET_KEY: Optional[str] BIND_ADDR: str ALLOWED_HOSTS: str DEBUG: bool @@ -52,10 +53,11 @@ class ConfigDict(BaseConfig, total=False): SAVE_FAVICON: bool SAVE_WGET: bool SAVE_WGET_REQUISITES: bool + SAVE_SINGLEFILE: bool + SAVE_READABILITY: bool SAVE_PDF: bool SAVE_SCREENSHOT: bool SAVE_DOM: bool - SAVE_SINGLEFILE: bool SAVE_WARC: bool SAVE_GIT: bool SAVE_MEDIA: bool @@ -75,53 +77,18 @@ class ConfigDict(BaseConfig, total=False): USE_CURL: bool USE_WGET: bool + USE_SINGLEFILE: bool + USE_READABILITY: bool USE_GIT: bool USE_CHROME: bool USE_YOUTUBEDL: bool - USE_SINGLEFILE: bool - - CURL_BINARY: Optional[str] - GIT_BINARY: Optional[str] - WGET_BINARY: Optional[str] - YOUTUBEDL_BINARY: Optional[str] + CURL_BINARY: str + GIT_BINARY: str + WGET_BINARY: str + SINGLEFILE_BINARY: str + READABILITY_BINARY: str + YOUTUBEDL_BINARY: str CHROME_BINARY: Optional[str] - SINGLEFILE_BINARY: Optional[str] - - TERM_WIDTH: Callable[[], int] - USER: str - ANSI: Dict[str, str] - REPO_DIR: str - PYTHON_DIR: str - TEMPLATES_DIR: str - ARCHIVE_DIR: str - SOURCES_DIR: str - LOGS_DIR: str - - URL_BLACKLIST_PTN: Optional[Pattern] - WGET_AUTO_COMPRESSION: bool - - ARCHIVEBOX_BINARY: str - VERSION: str - GIT_SHA: str - - PYTHON_BINARY: str - PYTHON_ENCODING: str - PYTHON_VERSION: str - - DJANGO_BINARY: str - DJANGO_VERSION: str - - CURL_VERSION: str - WGET_VERSION: str - YOUTUBEDL_VERSION: str - GIT_VERSION: str - CHROME_VERSION: str - - DEPENDENCIES: Dict[str, SimpleConfigValueDict] - CODE_LOCATIONS: Dict[str, SimpleConfigValueDict] - CONFIG_LOCATIONS: Dict[str, SimpleConfigValueDict] - DATA_LOCATIONS: Dict[str, SimpleConfigValueDict] - CHROME_OPTIONS: Dict[str, SimpleConfigValue] ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue] diff --git a/archivebox/extractors/readability.py b/archivebox/extractors/readability.py index f181160d..219402b5 100644 --- a/archivebox/extractors/readability.py +++ b/archivebox/extractors/readability.py @@ -17,7 +17,7 @@ from ..util import ( from ..config import ( TIMEOUT, SAVE_READABILITY, - READABILITY_BINARY, + DEPENDENCIES, READABILITY_VERSION, ) from ..logging_util import TimedProgress @@ -73,7 +73,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO temp_doc.close() cmd = [ - READABILITY_BINARY, + DEPENDENCIES['READABILITY_BINARY']['path'], temp_doc.name ] diff --git a/archivebox/extractors/singlefile.py b/archivebox/extractors/singlefile.py index 87e7d5fd..702e44a0 100644 --- a/archivebox/extractors/singlefile.py +++ b/archivebox/extractors/singlefile.py @@ -15,7 +15,7 @@ from ..util import ( from ..config import ( TIMEOUT, SAVE_SINGLEFILE, - SINGLEFILE_BINARY, + DEPENDENCIES, SINGLEFILE_VERSION, CHROME_BINARY, ) @@ -43,7 +43,7 @@ def save_singlefile(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOU # SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli cmd = [ - SINGLEFILE_BINARY, + DEPENDENCIES['SINGLEFILE_BINARY']['path'], '--browser-executable-path={}'.format(CHROME_BINARY), '--browser-args="{}"'.format(json.dumps(browser_args[1:])), link.url, diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py index c3b6ce8c..d6ab601f 100644 --- a/archivebox/index/schema.py +++ b/archivebox/index/schema.py @@ -127,7 +127,7 @@ class Link: def __str__(self) -> str: - return f'[{self.timestamp}] {self.base_url} "{self.title}"' + return f'[{self.timestamp}] {self.url} "{self.title}"' def __post_init__(self): self.typecheck() diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py index 71a50c56..b10fc10b 100644 --- a/archivebox/logging_util.py +++ b/archivebox/logging_util.py @@ -99,15 +99,18 @@ class TimedProgress: if self.SHOW_PROGRESS: # terminate if we havent already terminated - self.p.terminate() - self.p.join() - self.p.close() - - # clear whole terminal line try: - sys.stdout.write('\r{}{}\r'.format((' ' * TERM_WIDTH()), ANSI['reset'])) - except (IOError, BrokenPipeError): - # ignore when the parent proc has stopped listening to our stdout + self.p.terminate() + self.p.join() + self.p.close() + + # clear whole terminal line + try: + sys.stdout.write('\r{}{}\r'.format((' ' * TERM_WIDTH()), ANSI['reset'])) + except (IOError, BrokenPipeError): + # ignore when the parent proc has stopped listening to our stdout + pass + except ValueError: pass @@ -466,7 +469,10 @@ def printable_folders(folders: Dict[str, Optional["Link"]], from .index.csv import links_to_csv return links_to_csv(folders.values(), cols=csv.split(','), header=True) - return '\n'.join(f'{folder} {link}' for folder, link in folders.items()) + return '\n'.join( + f'{folder} {link and link.url} "{link and link.title}"' + for folder, link in folders.items() + ) diff --git a/archivebox/main.py b/archivebox/main.py index b65c6e64..3958405c 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -114,6 +114,8 @@ ALLOWED_IN_OUTPUT_DIR = { 'venv', 'virtualenv', '.virtualenv', + 'node_modules', + 'package-lock.json', ARCHIVE_DIR_NAME, SOURCES_DIR_NAME, LOGS_DIR_NAME, diff --git a/archivebox/package.json b/archivebox/package.json new file mode 120000 index 00000000..4e26811d --- /dev/null +++ b/archivebox/package.json @@ -0,0 +1 @@ +../package.json \ No newline at end of file diff --git a/bin/build.sh b/bin/build.sh new file mode 100755 index 00000000..5df6721a --- /dev/null +++ b/bin/build.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +### Bash Environment Setup +# http://redsymbol.net/articles/unofficial-bash-strict-mode/ +# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html +# set -o xtrace +set -o errexit +set -o errtrace +set -o nounset +set -o pipefail +IFS=$'\n' + +REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )" + +source "$REPO_DIR/.venv/bin/activate" +cd "$REPO_DIR" + +# echo "[*] Fetching latest docs version" +# cd "$REPO_DIR/docs" +# git pull +# cd "$REPO_DIR" + +# echo "[+] Building docs" +# sphinx-apidoc -o docs archivebox +# cd "$REPO_DIR/docs" +# make html +# cd "$REPO_DIR" + +echo "[*] Cleaning up build dirs" +cd "$REPO_DIR" +rm -Rf build dist archivebox.egg-info + +echo "[+] Building sdist, bdist_egg, and bdist_wheel" +python3 setup.py sdist bdist_egg bdist_wheel + +echo "[+] Building docker image in the background..." +docker build . -t archivebox \ + -t archivebox:latest > /tmp/archivebox_docker_build.log 2>&1 & +ps "$!" + +echo "[√] Done. Install the built package by running:" +echo " python3 setup.py install" +echo " # or" +echo " pip3 install ." diff --git a/bin/release.sh b/bin/release.sh index 7f5a7db4..16cde4d3 100755 --- a/bin/release.sh +++ b/bin/release.sh @@ -10,29 +10,28 @@ set -o nounset set -o pipefail IFS=$'\n' -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )" -VERSION_FILE="$DIR/archivebox/VERSION" +REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )" function bump_semver { echo "$1" | awk -F. '{$NF = $NF + 1;} 1' | sed 's/ /./g' } -source "$DIR/.venv/bin/activate" -cd "$DIR" +source "$REPO_DIR/.venv/bin/activate" +cd "$REPO_DIR" -OLD_VERSION="$(cat "$VERSION_FILE")" +OLD_VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")" NEW_VERSION="$(bump_semver "$OLD_VERSION")" echo "[*] Fetching latest docs version" -cd "$DIR/docs" +cd "$REPO_DIR/docs" git pull -cd "$DIR" +cd "$REPO_DIR" echo "[+] Building docs" sphinx-apidoc -o docs archivebox -cd "$DIR/docs" +cd "$REPO_DIR/docs" make html -cd "$DIR" +cd "$REPO_DIR" if [ -z "$(git status --porcelain)" ] && [[ "$(git branch --show-current)" == "master" ]]; then git pull @@ -43,19 +42,21 @@ else fi echo "[*] Bumping VERSION from $OLD_VERSION to $NEW_VERSION" -echo "$NEW_VERSION" > "$VERSION_FILE" -git add "$DIR/docs" -git add "$VERSION_FILE" +contents="$(jq ".version = \"$NEW_VERSION\"" "$REPO_DIR/package.json")" && \ +echo "${contents}" > package.json +git add "$REPO_DIR/docs" +git add "$REPO_DIR/package.json" +git add "$REPO_DIR/package-lock.json" echo "[*] Cleaning up build dirs" -cd "$DIR" -rm -Rf build dist +cd "$REPO_DIR" +rm -Rf build dist archivebox.egg-info echo "[+] Building sdist and bdist_wheel" -python3 setup.py sdist bdist_wheel +python3 setup.py sdist bdist_egg bdist_wheel echo "[^] Pushing source to github" -git add "$DIR/archivebox.egg-info" +git add "$REPO_DIR/archivebox.egg-info" git commit -m "$NEW_VERSION release" git tag -a "v$NEW_VERSION" -m "v$NEW_VERSION" git push origin master diff --git a/docker-compose.yml b/docker-compose.yml index c8733e1b..a209e959 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -31,7 +31,7 @@ services: # host machine, add tasks and see more info with archivebox schedule --help # scheduler: # image: nikisweeting/archivebox:latest - # command: schedule --foreground --every=day --depth=1 'https://getpocket.com/users/USERNA", "license": "MIT", @@ -8,7 +8,9 @@ "archivebox": "./bin/archive" }, "bin": { - "archivebox": "./bin/archive" + "archivebox-node": "./bin/archive", + "single-file": "./node_modules/.bin/single-file", + "readability-extractor": "./node_modules/.bin/readability-extractor" }, "dependencies": { "readability-extractor": "git+https://github.com/pirate/readability-extractor.git", diff --git a/setup.py b/setup.py index e4794f00..db83e9bf 100755 --- a/setup.py +++ b/setup.py @@ -1,18 +1,57 @@ +# import sys +import json import setuptools + from pathlib import Path +# from subprocess import check_call +# from setuptools.command.install import install +# from setuptools.command.develop import develop +# from setuptools.command.egg_info import egg_info + PKG_NAME = "archivebox" REPO_URL = "https://github.com/pirate/ArchiveBox" -BASE_DIR = Path(__file__).parent.resolve() -SOURCE_DIR = BASE_DIR / PKG_NAME -README = (BASE_DIR / "README.md").read_text() -VERSION = (SOURCE_DIR / "VERSION").read_text().strip() +REPO_DIR = Path(__file__).parent.resolve() +PYTHON_DIR = REPO_DIR / PKG_NAME +README = (PYTHON_DIR / "README.md").read_text() +VERSION = json.loads((PYTHON_DIR / "package.json").read_text().strip())['version'] + +# To see when setup.py gets called (uncomment for debugging): -# To see when setup.py gets called (uncomment for debugging) # import sys -# print(SOURCE_DIR, f" (v{VERSION})") +# print(PYTHON_DIR, f" (v{VERSION})") # print('>', sys.executable, *sys.argv) -# raise SystemExit(0) + +# Sketchy way to install npm dependencies as a pip post-install script + +# def setup_js(): +# if sys.platform.lower() not in ('darwin', 'linux'): +# sys.stderr.write('[!] Warning: ArchiveBox is not officially supported on this platform.\n') + +# sys.stderr.write(f'[+] Installing ArchiveBox npm package (PYTHON_DIR={PYTHON_DIR})...\n') +# try: +# check_call(f'npm install -g "{REPO_DIR}"', shell=True) +# sys.stderr.write('[√] Automatically installed npm dependencies.\n') +# except Exception as err: +# sys.stderr.write(f'[!] Failed to auto-install npm dependencies: {err}\n') +# sys.stderr.write(' Install NPM/npm using your system package manager, then run:\n') +# sys.stderr.write(' npm install -g "git+https://github.com/pirate/ArchiveBox.git\n') + + +# class CustomInstallCommand(install): +# def run(self): +# super().run() +# setup_js() + +# class CustomDevelopCommand(develop): +# def run(self): +# super().run() +# setup_js() + +# class CustomEggInfoCommand(egg_info): +# def run(self): +# super().run() +# setup_js() setuptools.setup( name=PKG_NAME, @@ -72,13 +111,18 @@ setuptools.setup( # 'redis': ['redis', 'django-redis'], # 'pywb': ['pywb', 'redis'], }, - packages=setuptools.find_packages(), + packages=['archivebox'], + include_package_data=True, # see MANIFEST.in entry_points={ "console_scripts": [ f"{PKG_NAME} = {PKG_NAME}.cli:main", ], }, - include_package_data=True, + # cmdclass={ + # 'install': CustomInstallCommand, + # 'develop': CustomDevelopCommand, + # 'egg_info': CustomEggInfoCommand, + # }, classifiers=[ "License :: OSI Approved :: MIT License", "Natural Language :: English",