diff --git a/VERSION b/VERSION index 0d91a54c..1d0ba9ea 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.3.0 +0.4.0 diff --git a/archivebox/__init__.py b/archivebox/__init__.py index ab53f570..e69de29b 100644 --- a/archivebox/__init__.py +++ b/archivebox/__init__.py @@ -1,5 +0,0 @@ - - -#__name__ = 'archivebox' -#__package__ = 'archivebox' - diff --git a/archivebox/__main__.py b/archivebox/__main__.py new file mode 100755 index 00000000..8e75ec40 --- /dev/null +++ b/archivebox/__main__.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 + +""" +Main ArchiveBox command line application entrypoint. +""" + +__package__ = 'archivebox' + +import os +import sys + +PYTHON_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(PYTHON_DIR) + +from .env import * +from .legacy.archive import main + + +if __name__ == '__main__': + main(sys.argv) + diff --git a/archivebox/archivebox/VERSION b/archivebox/archivebox/VERSION deleted file mode 120000 index 6ff19de4..00000000 --- a/archivebox/archivebox/VERSION +++ /dev/null @@ -1 +0,0 @@ -../VERSION \ No newline at end of file diff --git a/archivebox/archivebox/__init__.py b/archivebox/archivebox/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/archivebox/archivebox/settings.py b/archivebox/archivebox/settings.py deleted file mode 100644 index e027de02..00000000 --- a/archivebox/archivebox/settings.py +++ /dev/null @@ -1,123 +0,0 @@ -""" -Django settings for archivebox project. - -Generated by 'django-admin startproject' using Django 2.1.7. - -For more information on this file, see -https://docs.djangoproject.com/en/2.1/topics/settings/ - -For the full list of settings and their values, see -https://docs.djangoproject.com/en/2.1/ref/settings/ -""" - -import os - -# Build paths inside the project like this: os.path.join(COLLECTION_DIR, ...) -REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..')) -COLLECTION_DIR = os.path.abspath(os.curdir) - -print(REPO_DIR) -print(COLLECTION_DIR) -raise SystemExit(0) - - -# Quick-start development settings - unsuitable for production -# See https://docs.djangoproject.com/en/2.1/howto/deployment/checklist/ - -# SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = 'm-ma!-z^0b5w4%**le#ig!7-d@h($t02q*96h*-ua+$lm9bvao' - -# SECURITY WARNING: don't run with debug turned on in production! -DEBUG = True - -ALLOWED_HOSTS = [] - - -# Application definition - -INSTALLED_APPS = [ - 'django.contrib.admin', - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - 'django.contrib.messages', - 'django.contrib.staticfiles', - - 'core', -] - -MIDDLEWARE = [ - 'django.middleware.security.SecurityMiddleware', - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.middleware.common.CommonMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - 'django.middleware.clickjacking.XFrameOptionsMiddleware', -] - -ROOT_URLCONF = 'archivebox.urls' - -ACTIVE_THEME = 'default' -TEMPLATES_DIR = os.path.join(REPO_DIR, 'themes', ACTIVE_THEME) -TEMPLATES = [ - { - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'DIRS': [TEMPLATES_DIR], - 'APP_DIRS': True, - 'OPTIONS': { - 'context_processors': [ - 'django.template.context_processors.debug', - 'django.template.context_processors.request', - 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', - ], - }, - }, -] - -WSGI_APPLICATION = 'archivebox.wsgi.application' - - -# Database -# https://docs.djangoproject.com/en/2.1/ref/settings/#databases - -DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.sqlite3', - 'NAME': os.path.join(COLLECTION_DIR, 'database.sqlite3'), - } -} - - -# Password validation -# https://docs.djangoproject.com/en/2.1/ref/settings/#auth-password-validators - -AUTH_PASSWORD_VALIDATORS = [ - { - 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', - }, -] - - -# Internationalization -# https://docs.djangoproject.com/en/2.1/topics/i18n/ -LANGUAGE_CODE = 'en-us' -TIME_ZONE = 'UTC' -USE_I18N = True -USE_L10N = True -USE_TZ = True - - -# Static files (CSS, JavaScript, Images) -# https://docs.djangoproject.com/en/2.1/howto/static-files/ -STATIC_URL = '/static/' diff --git a/archivebox/core/management/commands/archivebox.py b/archivebox/core/management/commands/archivebox.py index 1764e4e2..c3c236e5 100644 --- a/archivebox/core/management/commands/archivebox.py +++ b/archivebox/core/management/commands/archivebox.py @@ -1,10 +1,11 @@ from django.core.management.base import BaseCommand -from core.archive import main +from legacy.archive import main + class Command(BaseCommand): help = 'ArchiveBox test.bee' def handle(self, *args, **kwargs): - main() + main(*args) diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py new file mode 100644 index 00000000..0f209b4c --- /dev/null +++ b/archivebox/core/settings.py @@ -0,0 +1,78 @@ +import os + +from legacy.config import ( + REPO_DIR, + OUTPUT_DIR, + TEMPLATES_DIR, + DATABASE_DIR, +) + + +SECRET_KEY = '---------------- not a valid secret key ! ----------------' +DEBUG = True + + +INSTALLED_APPS = [ + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', + + 'core', +] + +MIDDLEWARE = [ + 'django.middleware.security.SecurityMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', +] + +ROOT_URLCONF = 'core.urls' +TEMPLATES = [ + { + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [TEMPLATES_DIR], + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + }, + }, +] + +WSGI_APPLICATION = 'core.wsgi.application' + + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': os.path.join(DATABASE_DIR, 'database.sqlite3'), + } +} + +AUTH_PASSWORD_VALIDATORS = [ + {'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator'}, + {'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator'}, + {'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator'}, + {'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'}, +] + + +LANGUAGE_CODE = 'en-us' +TIME_ZONE = 'UTC' +USE_I18N = True +USE_L10N = True +USE_TZ = True + + +STATIC_URL = '/static/' diff --git a/archivebox/archivebox/urls.py b/archivebox/core/urls.py similarity index 100% rename from archivebox/archivebox/urls.py rename to archivebox/core/urls.py diff --git a/archivebox/archivebox/wsgi.py b/archivebox/core/wsgi.py similarity index 100% rename from archivebox/archivebox/wsgi.py rename to archivebox/core/wsgi.py diff --git a/archivebox/env.py b/archivebox/env.py new file mode 100644 index 00000000..3a40fab5 --- /dev/null +++ b/archivebox/env.py @@ -0,0 +1,11 @@ +import os +import sys + + +PYTHON_DIR = os.path.dirname(os.path.abspath(__file__)) + +sys.path.append(PYTHON_DIR) +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "core.settings") + +import django +django.setup() diff --git a/archivebox/legacy/__init__.py b/archivebox/legacy/__init__.py new file mode 100644 index 00000000..ab53f570 --- /dev/null +++ b/archivebox/legacy/__init__.py @@ -0,0 +1,5 @@ + + +#__name__ = 'archivebox' +#__package__ = 'archivebox' + diff --git a/archivebox/core/archive.py b/archivebox/legacy/archive.py similarity index 85% rename from archivebox/core/archive.py rename to archivebox/legacy/archive.py index e74b2644..82788c47 100755 --- a/archivebox/core/archive.py +++ b/archivebox/legacy/archive.py @@ -8,7 +8,7 @@ but you can also run it directly using `python3 archive.py` Usage & Documentation: https://github.com/pirate/ArchiveBox/Wiki """ -__package__ = 'archivebox' +__package__ = 'legacy' import os import sys @@ -16,37 +16,50 @@ import shutil from typing import List, Optional -from core.schema import Link -from core.links import links_after_timestamp -from core.index import write_links_index, load_links_index -from core.archive_methods import archive_link -from core.config import ( +from .schema import Link +from .links import links_after_timestamp +from .index import write_links_index, load_links_index +from .archive_methods import archive_link +from .config import ( ONLY_NEW, - OUTPUT_DIR, VERSION, ANSI, - CURL_VERSION, - GIT_VERSION, - WGET_VERSION, - YOUTUBEDL_VERSION, - CHROME_VERSION, + + REPO_DIR, + PYTHON_DIR, + LEGACY_DIR, + TEMPLATES_DIR, + OUTPUT_DIR, + SOURCES_DIR, + ARCHIVE_DIR, + DATABASE_DIR, + USE_CURL, USE_WGET, USE_CHROME, + FETCH_GIT, + FETCH_MEDIA, + + DJANGO_BINARY, CURL_BINARY, GIT_BINARY, WGET_BINARY, YOUTUBEDL_BINARY, CHROME_BINARY, - FETCH_GIT, - FETCH_MEDIA, + + DJANGO_VERSION, + CURL_VERSION, + GIT_VERSION, + WGET_VERSION, + YOUTUBEDL_VERSION, + CHROME_VERSION, ) -from core.util import ( +from .util import ( enforce_types, handle_stdin_import, handle_file_import, ) -from core.logs import ( +from .logs import ( log_archiving_started, log_archiving_paused, log_archiving_finished, @@ -74,9 +87,26 @@ def print_help(): print(" archivebox add --depth=1 https://example.com/feed.rss") print(" archivebox update --resume=15109948213.123") + def print_version(): print('ArchiveBox v{}'.format(__VERSION__)) print() + print('[i] Folder locations:') + print(' REPO_DIR: ', REPO_DIR) + print(' PYTHON_DIR: ', PYTHON_DIR) + print(' LEGACY_DIR: ', LEGACY_DIR) + print(' TEMPLATES_DIR: ', TEMPLATES_DIR) + print() + print(' OUTPUT_DIR: ', OUTPUT_DIR) + print(' SOURCES_DIR: ', SOURCES_DIR) + print(' ARCHIVE_DIR: ', ARCHIVE_DIR) + print(' DATABASE_DIR: ', DATABASE_DIR) + print() + print( + '[√] Django:'.ljust(14), + 'python3 {} --version\n'.format(DJANGO_BINARY), + ' '*13, DJANGO_VERSION, '\n', + ) print( '[{}] CURL:'.format('√' if USE_CURL else 'X').ljust(14), '{} --version\n'.format(shutil.which(CURL_BINARY)), @@ -132,8 +162,11 @@ def main(args=None) -> None: if not os.path.exists(OUTPUT_DIR): print('{green}[+] Created a new archive directory: {}{reset}'.format(OUTPUT_DIR, **ANSI)) os.makedirs(OUTPUT_DIR) + os.makedirs(SOURCES_DIR) + os.makedirs(ARCHIVE_DIR) + os.makedirs(DATABASE_DIR) else: - not_empty = len(set(os.listdir(OUTPUT_DIR)) - {'.DS_Store'}) + not_empty = len(set(os.listdir(OUTPUT_DIR)) - {'.DS_Store', '.venv', 'venv', 'virtualenv', '.virtualenv'}) index_exists = os.path.exists(os.path.join(OUTPUT_DIR, 'index.json')) if not_empty and not index_exists: print( diff --git a/archivebox/core/archive_methods.py b/archivebox/legacy/archive_methods.py similarity index 99% rename from archivebox/core/archive_methods.py rename to archivebox/legacy/archive_methods.py index add5a069..d30d008d 100644 --- a/archivebox/core/archive_methods.py +++ b/archivebox/legacy/archive_methods.py @@ -4,13 +4,13 @@ from typing import Dict, List, Tuple, Optional from collections import defaultdict from datetime import datetime -from core.schema import Link, ArchiveResult, ArchiveOutput -from core.index import ( +from .schema import Link, ArchiveResult, ArchiveOutput +from .index import ( write_link_index, patch_links_index, load_json_link_index, ) -from core.config import ( +from .config import ( CURL_BINARY, GIT_BINARY, WGET_BINARY, @@ -40,7 +40,7 @@ from core.config import ( YOUTUBEDL_VERSION, WGET_AUTO_COMPRESSION, ) -from core.util import ( +from .util import ( enforce_types, domain, extension, @@ -54,7 +54,7 @@ from core.util import ( chrome_args, run, PIPE, DEVNULL, ) -from core.logs import ( +from .logs import ( log_link_archiving_started, log_link_archiving_finished, log_archive_method_started, diff --git a/archivebox/core/config.py b/archivebox/legacy/config.py similarity index 95% rename from archivebox/core/config.py rename to archivebox/legacy/config.py index f9f5ea57..413bed68 100644 --- a/archivebox/core/config.py +++ b/archivebox/legacy/config.py @@ -1,6 +1,7 @@ import os import re import sys +import django import shutil from typing import Optional @@ -58,7 +59,6 @@ YOUTUBEDL_BINARY = os.getenv('YOUTUBEDL_BINARY', 'youtube-dl') CHROME_BINARY = os.getenv('CHROME_BINARY', None) - # ****************************************************************************** ### Terminal Configuration @@ -79,7 +79,7 @@ if not USE_COLOR: ANSI = {k: '' for k in ANSI.keys()} -REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..')) +REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..')) if OUTPUT_DIR: OUTPUT_DIR = os.path.abspath(OUTPUT_DIR) else: @@ -87,11 +87,14 @@ else: ARCHIVE_DIR_NAME = 'archive' SOURCES_DIR_NAME = 'sources' +DATABASE_DIR_NAME = 'database' ARCHIVE_DIR = os.path.join(OUTPUT_DIR, ARCHIVE_DIR_NAME) SOURCES_DIR = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME) +DATABASE_DIR = os.path.join(OUTPUT_DIR, DATABASE_DIR_NAME) PYTHON_DIR = os.path.join(REPO_DIR, 'archivebox') -TEMPLATES_DIR = os.path.join(PYTHON_DIR, 'templates') +LEGACY_DIR = os.path.join(PYTHON_DIR, 'legacy') +TEMPLATES_DIR = os.path.join(LEGACY_DIR, 'templates') if COOKIES_FILE: COOKIES_FILE = os.path.abspath(COOKIES_FILE) @@ -100,8 +103,8 @@ URL_BLACKLIST_PTN = re.compile(URL_BLACKLIST, re.IGNORECASE) if URL_BLACKLIST el ########################### Environment & Dependencies ######################### -VERSION = open(os.path.join(PYTHON_DIR, 'VERSION'), 'r').read().strip() -GIT_SHA = VERSION.split('+')[1] +VERSION = open(os.path.join(REPO_DIR, 'VERSION'), 'r').read().strip() +GIT_SHA = VERSION.split('+')[-1] or 'unknown' ### Check Python environment python_vers = float('{}.{}'.format(sys.version_info.major, sys.version_info.minor)) @@ -196,6 +199,10 @@ def find_chrome_data_dir() -> Optional[str]: # ****************************************************************************** try: + ### Get Django version + DJANGO_BINARY = django.__file__.replace('__init__.py', 'bin/django-admin.py') + DJANGO_VERSION = '{}.{}.{} {} ({})'.format(*django.VERSION) + ### Make sure curl is installed if USE_CURL: USE_CURL = FETCH_FAVICON or SUBMIT_ARCHIVE_DOT_ORG diff --git a/archivebox/core/index.py b/archivebox/legacy/index.py similarity index 97% rename from archivebox/core/index.py rename to archivebox/legacy/index.py index 516e4304..98d9e3df 100644 --- a/archivebox/core/index.py +++ b/archivebox/legacy/index.py @@ -5,8 +5,8 @@ from datetime import datetime from string import Template from typing import List, Tuple, Iterator, Optional, Mapping -from core.schema import Link, ArchiveResult -from core.config import ( +from .schema import Link, ArchiveResult +from .config import ( OUTPUT_DIR, TEMPLATES_DIR, VERSION, @@ -14,7 +14,7 @@ from core.config import ( FOOTER_INFO, TIMEOUT, ) -from core.util import ( +from .util import ( ts_to_date, merge_links, urlencode, @@ -27,9 +27,9 @@ from core.util import ( copy_and_overwrite, atomic_write, ) -from core.parse import parse_links -from core.links import validate_links -from core.logs import ( +from .parse import parse_links +from .links import validate_links +from .logs import ( log_indexing_process_started, log_indexing_started, log_indexing_finished, diff --git a/archivebox/core/links.py b/archivebox/legacy/links.py similarity index 96% rename from archivebox/core/links.py rename to archivebox/legacy/links.py index fa4f53e6..914c3575 100644 --- a/archivebox/core/links.py +++ b/archivebox/legacy/links.py @@ -1,14 +1,14 @@ from typing import Iterable from collections import OrderedDict -from core.schema import Link -from core.util import ( +from .schema import Link +from .util import ( scheme, fuzzy_url, merge_links, ) -from core.config import URL_BLACKLIST_PTN +from .config import URL_BLACKLIST_PTN def validate_links(links: Iterable[Link]) -> Iterable[Link]: diff --git a/archivebox/core/logs.py b/archivebox/legacy/logs.py similarity index 98% rename from archivebox/core/logs.py rename to archivebox/legacy/logs.py index 0b9243c2..d9b92422 100644 --- a/archivebox/core/logs.py +++ b/archivebox/legacy/logs.py @@ -5,8 +5,8 @@ from datetime import datetime from dataclasses import dataclass from typing import Optional -from core.schema import Link, ArchiveResult -from core.config import ANSI, OUTPUT_DIR +from .schema import Link, ArchiveResult +from .config import ANSI, OUTPUT_DIR @dataclass diff --git a/archivebox/core/parse.py b/archivebox/legacy/parse.py similarity index 99% rename from archivebox/core/parse.py rename to archivebox/legacy/parse.py index 9a6936c0..49ffa7fd 100644 --- a/archivebox/core/parse.py +++ b/archivebox/legacy/parse.py @@ -24,8 +24,8 @@ from typing import Tuple, List, IO, Iterable from datetime import datetime import xml.etree.ElementTree as etree -from core.config import TIMEOUT -from core.util import ( +from .config import TIMEOUT +from .util import ( htmldecode, str_between, URL_REGEX, diff --git a/archivebox/core/purge.py b/archivebox/legacy/purge.py similarity index 93% rename from archivebox/core/purge.py rename to archivebox/legacy/purge.py index d9a5deda..ddc64b6b 100755 --- a/archivebox/core/purge.py +++ b/archivebox/legacy/purge.py @@ -6,8 +6,8 @@ from os.path import exists, join from shutil import rmtree from typing import List -from core.config import ARCHIVE_DIR, OUTPUT_DIR -from core.index import parse_json_links_index, write_html_links_index, write_json_links_index +from .config import ARCHIVE_DIR, OUTPUT_DIR +from .index import parse_json_links_index, write_html_links_index, write_json_links_index def cleanup_index(regexes: List[str], proceed: bool, delete: bool) -> None: diff --git a/archivebox/core/schema.py b/archivebox/legacy/schema.py similarity index 100% rename from archivebox/core/schema.py rename to archivebox/legacy/schema.py diff --git a/archivebox/templates/index.html b/archivebox/legacy/templates/index.html similarity index 100% rename from archivebox/templates/index.html rename to archivebox/legacy/templates/index.html diff --git a/archivebox/templates/index_row.html b/archivebox/legacy/templates/index_row.html similarity index 100% rename from archivebox/templates/index_row.html rename to archivebox/legacy/templates/index_row.html diff --git a/archivebox/templates/link_index.html b/archivebox/legacy/templates/link_index.html similarity index 100% rename from archivebox/templates/link_index.html rename to archivebox/legacy/templates/link_index.html diff --git a/archivebox/static/archive.png b/archivebox/legacy/templates/static/archive.png similarity index 100% rename from archivebox/static/archive.png rename to archivebox/legacy/templates/static/archive.png diff --git a/archivebox/static/bootstrap.min.css b/archivebox/legacy/templates/static/bootstrap.min.css similarity index 100% rename from archivebox/static/bootstrap.min.css rename to archivebox/legacy/templates/static/bootstrap.min.css diff --git a/archivebox/static/external.png b/archivebox/legacy/templates/static/external.png similarity index 100% rename from archivebox/static/external.png rename to archivebox/legacy/templates/static/external.png diff --git a/archivebox/static/jquery.dataTables.min.css b/archivebox/legacy/templates/static/jquery.dataTables.min.css similarity index 100% rename from archivebox/static/jquery.dataTables.min.css rename to archivebox/legacy/templates/static/jquery.dataTables.min.css diff --git a/archivebox/static/jquery.dataTables.min.js b/archivebox/legacy/templates/static/jquery.dataTables.min.js similarity index 100% rename from archivebox/static/jquery.dataTables.min.js rename to archivebox/legacy/templates/static/jquery.dataTables.min.js diff --git a/archivebox/static/jquery.min.js b/archivebox/legacy/templates/static/jquery.min.js similarity index 100% rename from archivebox/static/jquery.min.js rename to archivebox/legacy/templates/static/jquery.min.js diff --git a/archivebox/static/sort_asc.png b/archivebox/legacy/templates/static/sort_asc.png similarity index 100% rename from archivebox/static/sort_asc.png rename to archivebox/legacy/templates/static/sort_asc.png diff --git a/archivebox/static/sort_both.png b/archivebox/legacy/templates/static/sort_both.png similarity index 100% rename from archivebox/static/sort_both.png rename to archivebox/legacy/templates/static/sort_both.png diff --git a/archivebox/static/sort_desc.png b/archivebox/legacy/templates/static/sort_desc.png similarity index 100% rename from archivebox/static/sort_desc.png rename to archivebox/legacy/templates/static/sort_desc.png diff --git a/archivebox/static/spinner.gif b/archivebox/legacy/templates/static/spinner.gif similarity index 100% rename from archivebox/static/spinner.gif rename to archivebox/legacy/templates/static/spinner.gif diff --git a/archivebox/core/util.py b/archivebox/legacy/util.py similarity index 98% rename from archivebox/core/util.py rename to archivebox/legacy/util.py index cf314287..8121a988 100644 --- a/archivebox/core/util.py +++ b/archivebox/legacy/util.py @@ -26,8 +26,8 @@ from subprocess import ( from base32_crockford import encode as base32_encode # type: ignore -from core.schema import Link -from core.config import ( +from .schema import Link +from .config import ( ANSI, TERM_WIDTH, SOURCES_DIR, @@ -38,9 +38,8 @@ from core.config import ( CHECK_SSL_VALIDITY, WGET_USER_AGENT, CHROME_OPTIONS, - PYTHON_DIR, ) -from core.logs import pretty_path +from .logs import pretty_path ### Parsing Helpers @@ -332,14 +331,6 @@ def wget_output_path(link: Link) -> Optional[str]: return None -@enforce_types -def read_js_script(script_name: str) -> str: - script_path = os.path.join(PYTHON_DIR, 'scripts', script_name) - - with open(script_path, 'r') as f: - return f.read().split('// INFO BELOW HERE')[0].strip() - - ### String Manipulation & Logging Helpers @enforce_types diff --git a/archivebox/manage.py b/archivebox/manage.py index cc70dfd5..52c21895 100755 --- a/archivebox/manage.py +++ b/archivebox/manage.py @@ -3,7 +3,7 @@ import os import sys if __name__ == '__main__': - os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings') + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings') try: from django.core.management import execute_from_command_line except ImportError as exc: diff --git a/archivebox/tests/firefox_export.html b/archivebox/tests/firefox_export.html deleted file mode 100644 index 99d0bd0e..00000000 --- a/archivebox/tests/firefox_export.html +++ /dev/null @@ -1,34 +0,0 @@ - - - -Bookmarks -

Bookmarks Menu

- -

-

Recently Bookmarked -
Recent Tags -

Mozilla Firefox

-

-

Help and Tutorials -
Customize Firefox -
Get Involved -
About Us -

-

[Folder Name]

-

-

firefox export bookmarks at DuckDuckGo -
archive firefox bookmarks at DuckDuckGo -
nodiscc (nodiscc) · GitHub -
pirate/ArchiveBox · Github -
Phonotactic Reconstruction of Encrypted VoIP Conversations -
Firefox Bookmarks Archiver - gHacks Tech News -

-

Bookmarks Toolbar

-
Add bookmarks to this folder to see them displayed on the Bookmarks Toolbar -

-

Most Visited -
Getting Started -

-

diff --git a/archivebox/tests/pinboard_export.html b/archivebox/tests/pinboard_export.html deleted file mode 100644 index e12b5e41..00000000 --- a/archivebox/tests/pinboard_export.html +++ /dev/null @@ -1,12 +0,0 @@ - - -Pinboard Bookmarks -

Bookmarks

-
-

- -

Algo VPN scripts -
uLisp - -
-

diff --git a/archivebox/tests/pinboard_export.json b/archivebox/tests/pinboard_export.json deleted file mode 100644 index c39d08dd..00000000 --- a/archivebox/tests/pinboard_export.json +++ /dev/null @@ -1,8 +0,0 @@ -[{"href":"https:\/\/en.wikipedia.org\/wiki\/International_Typographic_Style","description":"International Typographic Style - Wikipedia, the free encyclopedia","extended":"","meta":"32f4cc916e6f5919cc19aceb10559cc1","hash":"3dd64e155e16731d20350bec6bef7cb5","time":"2016-06-07T11:27:08Z","shared":"no","toread":"yes","tags":""}, -{"href":"https:\/\/news.ycombinator.com\/item?id=11686984","description":"Announcing Certbot: EFF's Client for Let's Encrypt | Hacker News","extended":"","meta":"4a49602ba5d20ec3505c75d38ebc1d63","hash":"1c1acb53a5bd520e8529ce4f9600abee","time":"2016-05-13T05:46:16Z","shared":"no","toread":"yes","tags":""}, -{"href":"https:\/\/github.com\/google\/styleguide","description":"GitHub - google\/styleguide: Style guides for Google-originated open-source projects","extended":"","meta":"15a8d50f7295f18ccb6dd19cb689c68a","hash":"1028bf9872d8e4ea1b1858f4044abb58","time":"2016-02-24T08:49:25Z","shared":"no","toread":"no","tags":"code.style.guide programming reference web.dev"}, -{"href":"http:\/\/en.wikipedia.org\/wiki\/List_of_XML_and_HTML_character_entity_references","description":"List of XML and HTML character entity references - Wikipedia, the free encyclopedia","extended":"","meta":"6683a70f0f59c92c0bfd0bce653eab69","hash":"344d975c6251a8d460971fa2c43d9bbb","time":"2014-06-16T04:17:15Z","shared":"no","toread":"no","tags":"html reference web.dev typography"}, -{"href":"https:\/\/pushover.net\/","description":"Pushover: Simple Notifications for Android, iOS, and Desktop","extended":"","meta":"1e68511234d9390d10b7772c8ccc4b9e","hash":"bb93374ead8a937b18c7c46e13168a7d","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"app android"}, -{"href":"http:\/\/www.reddit.com\/r\/Android","description":"r\/android","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android 1"}, -{"href":"http:\/\/www.reddit.com\/r\/Android2","description":"r\/android","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e2","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android 2"}, -{"href":"http:\/\/www.reddit.com\/r\/Android3","description":"r\/android","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e4","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android 3"}] diff --git a/archivebox/tests/pinboard_export.rss b/archivebox/tests/pinboard_export.rss deleted file mode 100644 index a300720a..00000000 --- a/archivebox/tests/pinboard_export.rss +++ /dev/null @@ -1,46 +0,0 @@ - - - - Pinboard (private aaronmueller) - https://pinboard.in/u:aaronmueller/private/ - - - - - - - - - - - Mehkee - Mechanical Keyboard Parts & Accessories - 2018-11-08T21:29:32+00:00 - https://mehkee.com/ - aaronmueller - keyboard gadget diy - http://pinboard.in/ - http://pinboard.in/u:aaronmueller/b:xxx/ - - - - - - - - - - QMK Firmware - An open source firmware for AVR and ARM based keyboards - 2018-11-06T22:36:21+00:00 - https://qmk.fm/ - aaronmueller - firmware keyboard - http://pinboard.in/ - http://pinboard.in/u:aaronmueller/b:xxx/ - - - - - - - - diff --git a/archivebox/tests/pinboard_export.xml b/archivebox/tests/pinboard_export.xml deleted file mode 100644 index 9dce0f54..00000000 --- a/archivebox/tests/pinboard_export.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/archivebox/tests/pinboard_export_2.json b/archivebox/tests/pinboard_export_2.json deleted file mode 100644 index b106039c..00000000 --- a/archivebox/tests/pinboard_export_2.json +++ /dev/null @@ -1,2 +0,0 @@ -[{"href":"https:\/\/github.com\/trailofbits\/algo","description":"Algo VPN scripts","extended":"","meta":"62325ba3b577683aee854d7f191034dc","hash":"18d708f67bb26d843b1cac4530bb52aa","time":"2018-11-19T08:38:53Z","shared":"no","toread":"yes","tags":"vpn scripts"}, -{"href":"http:\/\/www.ulisp.com\/","description":"uLisp","extended":"","meta":"7bd0c0ef31f69d1459e3d37366e742b3","hash":"2a17ae95925a03a5b9bb38cf7f6c6f9b","time":"2018-11-16T13:20:12Z","shared":"no","toread":"yes","tags":"arduino avr embedded lisp"}] diff --git a/archivebox/tests/pocket_export.html b/archivebox/tests/pocket_export.html deleted file mode 100644 index bb51c0c6..00000000 --- a/archivebox/tests/pocket_export.html +++ /dev/null @@ -1,38 +0,0 @@ - - - - - - Pocket Export - - -

Unread

- - -

Read Archive

- - - diff --git a/archivebox/tests/rss_export.xml b/archivebox/tests/rss_export.xml deleted file mode 100644 index 69eb9bc2..00000000 --- a/archivebox/tests/rss_export.xml +++ /dev/null @@ -1,228 +0,0 @@ - - - - -My Reading List: Read and Unread -Items I've saved to read -http://readitlaterlist.com/users/nikisweeting/feed/all - - - - -<![CDATA[Cell signaling]]> -Unread -https://en.wikipedia.org/wiki/Cell_signaling -https://en.wikipedia.org/wiki/Cell_signaling -Mon, 30 Oct 2017 01:12:10 -0500 - - -<![CDATA[Hayflick limit]]> -Unread -https://en.wikipedia.org/wiki/Hayflick_limit -https://en.wikipedia.org/wiki/Hayflick_limit -Mon, 30 Oct 2017 01:11:38 -0500 - - -<![CDATA[Even moderate drinking by parents can upset children – study]]> -Unread -https://theguardian.com/society/2017/oct/18/even-moderate-drinking-by-parents-can-upset-children-study?CMP=Share_AndroidApp_Signal -https://theguardian.com/society/2017/oct/18/even-moderate-drinking-by-parents-can-upset-children-study?CMP=Share_AndroidApp_Signal -Mon, 30 Oct 2017 01:11:30 -0500 - - -<![CDATA[How Merkle trees enable the decentralized Web]]> -Unread -https://taravancil.com/blog/how-merkle-trees-enable-decentralized-web -https://taravancil.com/blog/how-merkle-trees-enable-decentralized-web -Mon, 30 Oct 2017 01:11:30 -0500 - - -<![CDATA[Inertial navigation system]]> -Unread -https://en.wikipedia.org/wiki/Inertial_navigation_system -https://en.wikipedia.org/wiki/Inertial_navigation_system -Mon, 30 Oct 2017 01:10:10 -0500 - - -<![CDATA[Dead reckoning]]> -Unread -https://en.wikipedia.org/wiki/Dead_reckoning -https://en.wikipedia.org/wiki/Dead_reckoning -Mon, 30 Oct 2017 01:10:08 -0500 - - -<![CDATA[Calling Rust From Python]]> -Unread -https://bheisler.github.io/post/calling-rust-in-python -https://bheisler.github.io/post/calling-rust-in-python -Mon, 30 Oct 2017 01:04:33 -0500 - - -<![CDATA[Why would anyone choose Docker over fat binaries?]]> -Unread -http://smashcompany.com/technology/why-would-anyone-choose-docker-over-fat-binaries -http://smashcompany.com/technology/why-would-anyone-choose-docker-over-fat-binaries -Sun, 29 Oct 2017 14:57:25 -0500 - - -<![CDATA[]]> -Unread -https://heml.io -https://heml.io -Sun, 29 Oct 2017 14:55:26 -0500 - - -<![CDATA[A surprising amount of people want to be in North Korea]]> -Unread -https://blog.benjojo.co.uk/post/north-korea-dprk-bgp-geoip-fruad -https://blog.benjojo.co.uk/post/north-korea-dprk-bgp-geoip-fruad -Sat, 28 Oct 2017 05:41:41 -0500 - - -<![CDATA[Learning a Hierarchy]]> -Unread -https://blog.openai.com/learning-a-hierarchy -https://blog.openai.com/learning-a-hierarchy -Thu, 26 Oct 2017 16:43:48 -0500 - - -<![CDATA[High Performance Browser Networking]]> -Unread -https://hpbn.co -https://hpbn.co -Wed, 25 Oct 2017 19:05:24 -0500 - - -<![CDATA[What tender and juicy drama is going on at your school/workplace?]]> -Unread -https://reddit.com/r/AskReddit/comments/78nc2a/what_tender_and_juicy_drama_is_going_on_at_your/dovab2v -https://reddit.com/r/AskReddit/comments/78nc2a/what_tender_and_juicy_drama_is_going_on_at_your/dovab2v -Wed, 25 Oct 2017 18:05:58 -0500 - - -<![CDATA[Using an SSH Bastion Host]]> -Unread -https://blog.scottlowe.org/2015/11/21/using-ssh-bastion-host -https://blog.scottlowe.org/2015/11/21/using-ssh-bastion-host -Wed, 25 Oct 2017 11:38:47 -0500 - - -<![CDATA[Let's Define "undefined" | NathanShane.me]]> -Unread -https://nathanshane.me/blog/let's-define-undefined -https://nathanshane.me/blog/let's-define-undefined -Wed, 25 Oct 2017 11:32:59 -0500 - - -<![CDATA[Control theory]]> -Unread -https://en.wikipedia.org/wiki/Control_theory#Closed-loop_transfer_function -https://en.wikipedia.org/wiki/Control_theory#Closed-loop_transfer_function -Tue, 24 Oct 2017 22:57:43 -0500 - - -<![CDATA[J012-86-intractable.pdf]]> -Unread -http://mit.edu/~jnt/Papers/J012-86-intractable.pdf -http://mit.edu/~jnt/Papers/J012-86-intractable.pdf -Tue, 24 Oct 2017 22:56:32 -0500 - - -<![CDATA[Dynamic Programming: First Principles]]> -Unread -http://flawlessrhetoric.com/Dynamic-Programming-First-Principles -http://flawlessrhetoric.com/Dynamic-Programming-First-Principles -Tue, 24 Oct 2017 22:56:30 -0500 - - -<![CDATA[What Would Happen If There Were No Number 6?]]> -Unread -https://fivethirtyeight.com/features/what-would-happen-if-there-were-no-number-6 -https://fivethirtyeight.com/features/what-would-happen-if-there-were-no-number-6 -Tue, 24 Oct 2017 22:21:59 -0500 - - -<![CDATA[Ten Basic Rules for Adventure]]> -Unread -https://outsideonline.com/2252916/10-basic-rules-adventure -https://outsideonline.com/2252916/10-basic-rules-adventure -Tue, 24 Oct 2017 20:56:25 -0500 - - -<![CDATA[Insects Are In Serious Trouble]]> -Unread -https://theatlantic.com/science/archive/2017/10/oh-no/543390?single_page=true -https://theatlantic.com/science/archive/2017/10/oh-no/543390?single_page=true -Mon, 23 Oct 2017 23:10:10 -0500 - - -<![CDATA[Netflix/bless]]> -Unread -https://github.com/Netflix/bless -https://github.com/Netflix/bless -Mon, 23 Oct 2017 23:04:46 -0500 - - -<![CDATA[Getting Your First 10 Customers]]> -Unread -https://stripe.com/atlas/guides/starting-sales -https://stripe.com/atlas/guides/starting-sales -Mon, 23 Oct 2017 22:27:36 -0500 - - -<![CDATA[GPS Hardware]]> -Unread -https://novasummits.com/gps-hardware -https://novasummits.com/gps-hardware -Mon, 23 Oct 2017 04:44:40 -0500 - - -<![CDATA[Bicycle Tires and Tubes]]> -Unread -http://sheldonbrown.com/tires.html#pressure -http://sheldonbrown.com/tires.html#pressure -Mon, 23 Oct 2017 01:28:32 -0500 - - -<![CDATA[Tire light is on]]> -Unread -https://reddit.com/r/Justrolledintotheshop/comments/77zm9e/tire_light_is_on/doqbshe -https://reddit.com/r/Justrolledintotheshop/comments/77zm9e/tire_light_is_on/doqbshe -Mon, 23 Oct 2017 01:21:42 -0500 - - -<![CDATA[Bad_Salish_Boo ?? on Twitter]]> -Unread -https://t.co/PDLlNjACv9 -https://t.co/PDLlNjACv9 -Sat, 21 Oct 2017 06:48:07 -0500 - - -<![CDATA[Is an Open Marriage a Happier Marriage?]]> -Unread -https://nytimes.com/2017/05/11/magazine/is-an-open-marriage-a-happier-marriage.html -https://nytimes.com/2017/05/11/magazine/is-an-open-marriage-a-happier-marriage.html -Fri, 20 Oct 2017 13:08:52 -0500 - - -<![CDATA[The Invention of Monogamy]]> -Unread -https://thenib.com/the-invention-of-monogamy -https://thenib.com/the-invention-of-monogamy -Fri, 20 Oct 2017 12:19:00 -0500 - - -<![CDATA[Google Chrome May Add a Permission to Stop In-Browser Cryptocurrency Miners]]> -Unread -https://bleepingcomputer.com/news/google/google-chrome-may-add-a-permission-to-stop-in-browser-cryptocurrency-miners -https://bleepingcomputer.com/news/google/google-chrome-may-add-a-permission-to-stop-in-browser-cryptocurrency-miners -Fri, 20 Oct 2017 03:57:41 -0500 - - - - diff --git a/archivebox/tests/tests.py b/archivebox/tests/tests.py deleted file mode 100755 index 33fd9ba4..00000000 --- a/archivebox/tests/tests.py +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env python3 -import json -import os -from os.path import dirname, pardir, join -from subprocess import check_output, check_call -from tempfile import TemporaryDirectory -from typing import List - -import pytest - - -ARCHIVER_BIN = join(dirname(__file__), pardir, 'archive.py') - - -class Helper: - def __init__(self, output_dir: str): - self.output_dir = output_dir - - def run(self, links, env=None, env_defaults=None): - if env_defaults is None: - env_defaults = { - # we don't wanna spam archive.org witin our tests.. - 'SUBMIT_ARCHIVE_DOT_ORG': 'False', - } - if env is None: - env = {} - - env = dict(**env_defaults, **env) - - jj = [] - for url in links: - jj.append({ - 'href': url, - 'description': url, - }) - input_json = join(self.output_dir, 'input.json') - with open(input_json, 'w') as fo: - json.dump(jj, fo) - - if env is None: - env = {} - env['OUTPUT_DIR'] = self.output_dir - check_call( - [ARCHIVER_BIN, input_json], - env={**os.environ.copy(), **env}, - ) - - -class TestArchiver: - def setup(self): - # self.tdir = TemporaryDirectory(dir='hello') - class AAA: - name = 'hello' - self.tdir = AAA() - - def teardown(self): - pass - # self.tdir.cleanup() - - @property - def output_dir(self): - return self.tdir.name - - def test_fetch_favicon_false(self): - h = Helper(self.output_dir) - - h.run(links=[ - 'https://google.com', - ], env={ - 'FETCH_FAVICON': 'False', - }) - # for now no asserts, good enough if it isn't failing - - def test_3000_links(self): - """ - The pages are deliberatly unreachable. The tool should gracefully process all of them even though individual links are failing. - """ - h = Helper(self.output_dir) - - h.run(links=[ - f'https://localhost:123/whatever_{i}.html' for i in range(3000) - ], env={ - 'FETCH_FAVICON': 'False', - 'FETCH_SCREENSHOT': 'False', - 'FETCH_PDF': 'False', - 'FETCH_DOM': 'False', - 'CHECK_SSL_VALIDITY': 'False', - }) - - -if __name__ == '__main__': - pytest.main([__file__]) diff --git a/bin/README.md b/bin/README.md deleted file mode 100644 index 88459dda..00000000 --- a/bin/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# Binaries for running ArchiveBox - -This folder contains all the executables that ArchiveBox provides. - - -# Adding it to your `$PATH` -To be able to run ArchiveBox from anywhere on your system, you can add this entire folder to your path, like so: - -**Edit `~/.bash_profile`:** -```bash -export PATH=/opt/ArchiveBox/bin:$PATH -``` - -# Running executables directly - -If you don't want to add ArchiveBox to your `$PATH` you can also call these executables directly with their full path, like so: - -`/opt/ArchiveBox/bin/ArchiveBox https://example.com/some/feed.rss` diff --git a/bin/archivebox b/bin/archivebox deleted file mode 120000 index 053f14ab..00000000 --- a/bin/archivebox +++ /dev/null @@ -1 +0,0 @@ -../archivebox/archive.py \ No newline at end of file diff --git a/bin/archivebox b/bin/archivebox new file mode 100755 index 00000000..601d4c25 --- /dev/null +++ b/bin/archivebox @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 + +import os +import sys + + +BIN_DIR = os.path.dirname(os.path.abspath(__file__)) +REPO_DIR = os.path.abspath(os.path.join(BIN_DIR, os.pardir)) +sys.path.append(REPO_DIR) + +from archivebox.__main__ import main + + +if __name__ == '__main__': + main(sys.argv) diff --git a/bin/archivebox-purge b/bin/archivebox-purge deleted file mode 120000 index 1bb208e1..00000000 --- a/bin/archivebox-purge +++ /dev/null @@ -1 +0,0 @@ -../archivebox/purge.py \ No newline at end of file diff --git a/setup.py b/setup.py index d3ce3963..d853492b 100644 --- a/setup.py +++ b/setup.py @@ -37,10 +37,11 @@ setuptools.setup( python_requires='>=3.6', install_requires=[ "base32-crockford==0.3.0", + "django==2.2", ], entry_points={ 'console_scripts': [ - 'archivebox = archivebox.archive:main', + 'archivebox = archivebox.__main__:main', ], }, package_data={