Merge e59406541d
into e5aba0dc2e
This commit is contained in:
commit
0c0ea7e4f5
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -26,11 +26,9 @@ dist/
|
||||||
|
|
||||||
# Data folders
|
# Data folders
|
||||||
data/
|
data/
|
||||||
data1/
|
|
||||||
data2/
|
|
||||||
data3/
|
|
||||||
data*/
|
data*/
|
||||||
output/
|
output/
|
||||||
|
|
||||||
# vim
|
# vim
|
||||||
*.sw?
|
*.sw?
|
||||||
|
.vscode/
|
||||||
|
|
|
@ -15,8 +15,8 @@
|
||||||
# Read more about [developing Archivebox](https://github.com/ArchiveBox/ArchiveBox#archivebox-development).
|
# Read more about [developing Archivebox](https://github.com/ArchiveBox/ArchiveBox#archivebox-development).
|
||||||
|
|
||||||
|
|
||||||
# Use Debian 12 w/ faster package updates: https://packages.debian.org/bookworm-backports/
|
|
||||||
FROM python:3.11-slim-bookworm
|
FROM python:3.11-slim-bookworm
|
||||||
|
# Uses Debian 12 w/ faster-updating apt-lists added below: https://packages.debian.org/bookworm-backports/
|
||||||
|
|
||||||
LABEL name="archivebox" \
|
LABEL name="archivebox" \
|
||||||
maintainer="Nick Sweeting <dockerfile@archivebox.io>" \
|
maintainer="Nick Sweeting <dockerfile@archivebox.io>" \
|
||||||
|
@ -127,9 +127,9 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
|
||||||
# 1. packaging dependencies
|
# 1. packaging dependencies
|
||||||
apt-transport-https ca-certificates apt-utils gnupg2 curl wget \
|
apt-transport-https ca-certificates apt-utils gnupg2 curl wget \
|
||||||
# 2. docker and init system dependencies
|
# 2. docker and init system dependencies
|
||||||
zlib1g-dev dumb-init gosu cron unzip grep \
|
zlib1g-dev dumb-init gosu cron unzip grep ncat \
|
||||||
# 3. frivolous CLI helpers to make debugging failed archiving easier
|
# 3. frivolous CLI helpers to make debugging failed archiving easier
|
||||||
# nano iputils-ping dnsutils htop procps jq yq
|
# nano iputils-ping dnsutils htop procps jq yq \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
######### Language Environments ####################################
|
######### Language Environments ####################################
|
||||||
|
|
|
@ -38,7 +38,7 @@ from hashlib import md5
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Optional, Type, Tuple, Dict, Union, List
|
from typing import Optional, Type, Tuple, Dict, Union, List
|
||||||
from subprocess import run, PIPE, DEVNULL
|
from subprocess import run, PIPE, STDOUT, DEVNULL
|
||||||
from configparser import ConfigParser
|
from configparser import ConfigParser
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
import importlib.metadata
|
import importlib.metadata
|
||||||
|
@ -854,7 +854,7 @@ def hint(text: Union[Tuple[str, ...], List[str], str], prefix=' ', config: Op
|
||||||
|
|
||||||
|
|
||||||
# Dependency Metadata Helpers
|
# Dependency Metadata Helpers
|
||||||
def bin_version(binary: Optional[str]) -> Optional[str]:
|
def bin_version(binary: Optional[str], cmd=None) -> Optional[str]:
|
||||||
"""check the presence and return valid version line of a specified binary"""
|
"""check the presence and return valid version line of a specified binary"""
|
||||||
|
|
||||||
abspath = bin_path(binary)
|
abspath = bin_path(binary)
|
||||||
|
@ -863,11 +863,21 @@ def bin_version(binary: Optional[str]) -> Optional[str]:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
bin_env = os.environ | {'LANG': 'C'}
|
bin_env = os.environ | {'LANG': 'C'}
|
||||||
version_str = run([abspath, "--version"], stdout=PIPE, env=bin_env).stdout.strip().decode()
|
is_cmd_str = cmd and isinstance(cmd, str)
|
||||||
|
version_str = run(cmd or [abspath, "--version"], shell=is_cmd_str, stdout=PIPE, stderr=STDOUT, env=bin_env).stdout.strip().decode()
|
||||||
if not version_str:
|
if not version_str:
|
||||||
version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode()
|
version_str = run(cmd or [abspath, "--version"], shell=is_cmd_str, stdout=PIPE, stderr=STDOUT).stdout.strip().decode()
|
||||||
# take first 3 columns of first line of version info
|
|
||||||
return ' '.join(version_str.split('\n')[0].strip().split()[:3])
|
version_ptn = re.compile(r"\d+?\.\d+?\.?\d*", re.MULTILINE)
|
||||||
|
try:
|
||||||
|
version_nums = version_ptn.findall(version_str.split('\n')[0])[0]
|
||||||
|
if version_nums:
|
||||||
|
return version_nums
|
||||||
|
else:
|
||||||
|
raise IndexError
|
||||||
|
except IndexError:
|
||||||
|
# take first 3 columns of first line of version info
|
||||||
|
return ' '.join(version_str.split('\n')[0].strip().split()[:3])
|
||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
# stderr(f'[X] Unable to find working version of dependency: {binary}', color='red')
|
# stderr(f'[X] Unable to find working version of dependency: {binary}', color='red')
|
||||||
|
|
|
@ -9,6 +9,8 @@ SimpleConfigValueDict = Dict[str, SimpleConfigValue]
|
||||||
SimpleConfigValueGetter = Callable[[], SimpleConfigValue]
|
SimpleConfigValueGetter = Callable[[], SimpleConfigValue]
|
||||||
ConfigValue = Union[SimpleConfigValue, SimpleConfigValueDict, SimpleConfigValueGetter]
|
ConfigValue = Union[SimpleConfigValue, SimpleConfigValueDict, SimpleConfigValueGetter]
|
||||||
|
|
||||||
|
SHArgs = List[str] # shell command args list e.g. ["--something=1", "--someotherarg"]
|
||||||
|
|
||||||
|
|
||||||
class BaseConfig(TypedDict):
|
class BaseConfig(TypedDict):
|
||||||
pass
|
pass
|
||||||
|
@ -16,10 +18,10 @@ class BaseConfig(TypedDict):
|
||||||
class ConfigDict(BaseConfig, total=False):
|
class ConfigDict(BaseConfig, total=False):
|
||||||
"""
|
"""
|
||||||
# Regenerate by pasting this quine into `archivebox shell` 🥚
|
# Regenerate by pasting this quine into `archivebox shell` 🥚
|
||||||
from archivebox.config import ConfigDict, CONFIG_DEFAULTS
|
from archivebox.config import ConfigDict, CONFIG_SCHEMA
|
||||||
print('class ConfigDict(BaseConfig, total=False):')
|
print('class ConfigDict(BaseConfig, total=False):')
|
||||||
print(' ' + '"'*3 + ConfigDict.__doc__ + '"'*3)
|
print(' ' + '"'*3 + ConfigDict.__doc__ + '"'*3)
|
||||||
for section, configs in CONFIG_DEFAULTS.items():
|
for section, configs in CONFIG_SCHEMA.items():
|
||||||
for key, attrs in configs.items():
|
for key, attrs in configs.items():
|
||||||
Type, default = attrs['type'], attrs['default']
|
Type, default = attrs['type'], attrs['default']
|
||||||
if default is None:
|
if default is None:
|
||||||
|
@ -32,16 +34,23 @@ class ConfigDict(BaseConfig, total=False):
|
||||||
USE_COLOR: bool
|
USE_COLOR: bool
|
||||||
SHOW_PROGRESS: bool
|
SHOW_PROGRESS: bool
|
||||||
IN_DOCKER: bool
|
IN_DOCKER: bool
|
||||||
|
IN_QEMU: bool
|
||||||
|
PUID: int
|
||||||
|
PGID: int
|
||||||
|
|
||||||
PACKAGE_DIR: Path
|
OUTPUT_DIR: Optional[str]
|
||||||
OUTPUT_DIR: Path
|
CONFIG_FILE: Optional[str]
|
||||||
CONFIG_FILE: Path
|
|
||||||
ONLY_NEW: bool
|
ONLY_NEW: bool
|
||||||
TIMEOUT: int
|
TIMEOUT: int
|
||||||
MEDIA_TIMEOUT: int
|
MEDIA_TIMEOUT: int
|
||||||
OUTPUT_PERMISSIONS: str
|
OUTPUT_PERMISSIONS: str
|
||||||
RESTRICT_FILE_NAMES: str
|
RESTRICT_FILE_NAMES: str
|
||||||
URL_DENYLIST: str
|
URL_DENYLIST: str
|
||||||
|
URL_ALLOWLIST: Optional[str]
|
||||||
|
ADMIN_USERNAME: Optional[str]
|
||||||
|
ADMIN_PASSWORD: Optional[str]
|
||||||
|
ENFORCE_ATOMIC_WRITES: bool
|
||||||
|
TAG_SEPARATOR_PATTERN: str
|
||||||
|
|
||||||
SECRET_KEY: Optional[str]
|
SECRET_KEY: Optional[str]
|
||||||
BIND_ADDR: str
|
BIND_ADDR: str
|
||||||
|
@ -49,7 +58,27 @@ class ConfigDict(BaseConfig, total=False):
|
||||||
DEBUG: bool
|
DEBUG: bool
|
||||||
PUBLIC_INDEX: bool
|
PUBLIC_INDEX: bool
|
||||||
PUBLIC_SNAPSHOTS: bool
|
PUBLIC_SNAPSHOTS: bool
|
||||||
|
PUBLIC_ADD_VIEW: bool
|
||||||
FOOTER_INFO: str
|
FOOTER_INFO: str
|
||||||
|
SNAPSHOTS_PER_PAGE: int
|
||||||
|
CUSTOM_TEMPLATES_DIR: Optional[str]
|
||||||
|
TIME_ZONE: str
|
||||||
|
TIMEZONE: str
|
||||||
|
REVERSE_PROXY_USER_HEADER: str
|
||||||
|
REVERSE_PROXY_WHITELIST: str
|
||||||
|
LOGOUT_REDIRECT_URL: str
|
||||||
|
PREVIEW_ORIGINALS: bool
|
||||||
|
LDAP: bool
|
||||||
|
LDAP_SERVER_URI: Optional[str]
|
||||||
|
LDAP_BIND_DN: Optional[str]
|
||||||
|
LDAP_BIND_PASSWORD: Optional[str]
|
||||||
|
LDAP_USER_BASE: Optional[str]
|
||||||
|
LDAP_USER_FILTER: Optional[str]
|
||||||
|
LDAP_USERNAME_ATTR: Optional[str]
|
||||||
|
LDAP_FIRSTNAME_ATTR: Optional[str]
|
||||||
|
LDAP_LASTNAME_ATTR: Optional[str]
|
||||||
|
LDAP_EMAIL_ATTR: Optional[str]
|
||||||
|
LDAP_CREATE_SUPERUSER: bool
|
||||||
|
|
||||||
SAVE_TITLE: bool
|
SAVE_TITLE: bool
|
||||||
SAVE_FAVICON: bool
|
SAVE_FAVICON: bool
|
||||||
|
@ -58,25 +87,50 @@ class ConfigDict(BaseConfig, total=False):
|
||||||
SAVE_SINGLEFILE: bool
|
SAVE_SINGLEFILE: bool
|
||||||
SAVE_READABILITY: bool
|
SAVE_READABILITY: bool
|
||||||
SAVE_MERCURY: bool
|
SAVE_MERCURY: bool
|
||||||
|
SAVE_HTMLTOTEXT: bool
|
||||||
SAVE_PDF: bool
|
SAVE_PDF: bool
|
||||||
SAVE_SCREENSHOT: bool
|
SAVE_SCREENSHOT: bool
|
||||||
SAVE_DOM: bool
|
SAVE_DOM: bool
|
||||||
|
SAVE_HEADERS: bool
|
||||||
SAVE_WARC: bool
|
SAVE_WARC: bool
|
||||||
SAVE_GIT: bool
|
SAVE_GIT: bool
|
||||||
SAVE_MEDIA: bool
|
SAVE_MEDIA: bool
|
||||||
SAVE_ARCHIVE_DOT_ORG: bool
|
SAVE_ARCHIVE_DOT_ORG: bool
|
||||||
|
SAVE_ALLOWLIST: dict
|
||||||
|
SAVE_DENYLIST: dict
|
||||||
|
|
||||||
RESOLUTION: str
|
RESOLUTION: str
|
||||||
GIT_DOMAINS: str
|
GIT_DOMAINS: str
|
||||||
CHECK_SSL_VALIDITY: bool
|
CHECK_SSL_VALIDITY: bool
|
||||||
|
MEDIA_MAX_SIZE: str
|
||||||
CURL_USER_AGENT: str
|
CURL_USER_AGENT: str
|
||||||
WGET_USER_AGENT: str
|
WGET_USER_AGENT: str
|
||||||
CHROME_USER_AGENT: str
|
CHROME_USER_AGENT: str
|
||||||
COOKIES_FILE: Union[str, Path, None]
|
COOKIES_FILE: Optional[str]
|
||||||
CHROME_USER_DATA_DIR: Union[str, Path, None]
|
CHROME_USER_DATA_DIR: Optional[str]
|
||||||
CHROME_TIMEOUT: int
|
CHROME_TIMEOUT: int
|
||||||
CHROME_HEADLESS: bool
|
CHROME_HEADLESS: bool
|
||||||
CHROME_SANDBOX: bool
|
CHROME_SANDBOX: bool
|
||||||
|
YOUTUBEDL_ARGS: list
|
||||||
|
WGET_ARGS: list
|
||||||
|
CURL_ARGS: list
|
||||||
|
GIT_ARGS: list
|
||||||
|
SINGLEFILE_ARGS: Optional[list]
|
||||||
|
FAVICON_PROVIDER: str
|
||||||
|
|
||||||
|
USE_INDEXING_BACKEND: bool
|
||||||
|
USE_SEARCHING_BACKEND: bool
|
||||||
|
SEARCH_BACKEND_ENGINE: str
|
||||||
|
SEARCH_BACKEND_HOST_NAME: str
|
||||||
|
SEARCH_BACKEND_PORT: int
|
||||||
|
SEARCH_BACKEND_PASSWORD: str
|
||||||
|
SEARCH_PROCESS_HTML: bool
|
||||||
|
SONIC_COLLECTION: str
|
||||||
|
SONIC_BUCKET: str
|
||||||
|
SEARCH_BACKEND_TIMEOUT: int
|
||||||
|
FTS_SEPARATE_DATABASE: bool
|
||||||
|
FTS_TOKENIZERS: str
|
||||||
|
FTS_SQLITE_MAX_LENGTH: int
|
||||||
|
|
||||||
USE_CURL: bool
|
USE_CURL: bool
|
||||||
USE_WGET: bool
|
USE_WGET: bool
|
||||||
|
@ -85,7 +139,9 @@ class ConfigDict(BaseConfig, total=False):
|
||||||
USE_MERCURY: bool
|
USE_MERCURY: bool
|
||||||
USE_GIT: bool
|
USE_GIT: bool
|
||||||
USE_CHROME: bool
|
USE_CHROME: bool
|
||||||
|
USE_NODE: bool
|
||||||
USE_YOUTUBEDL: bool
|
USE_YOUTUBEDL: bool
|
||||||
|
USE_RIPGREP: bool
|
||||||
CURL_BINARY: str
|
CURL_BINARY: str
|
||||||
GIT_BINARY: str
|
GIT_BINARY: str
|
||||||
WGET_BINARY: str
|
WGET_BINARY: str
|
||||||
|
@ -93,13 +149,12 @@ class ConfigDict(BaseConfig, total=False):
|
||||||
READABILITY_BINARY: str
|
READABILITY_BINARY: str
|
||||||
MERCURY_BINARY: str
|
MERCURY_BINARY: str
|
||||||
YOUTUBEDL_BINARY: str
|
YOUTUBEDL_BINARY: str
|
||||||
|
NODE_BINARY: str
|
||||||
|
RIPGREP_BINARY: str
|
||||||
CHROME_BINARY: Optional[str]
|
CHROME_BINARY: Optional[str]
|
||||||
|
POCKET_CONSUMER_KEY: Optional[str]
|
||||||
YOUTUBEDL_ARGS: List[str]
|
POCKET_ACCESS_TOKENS: dict
|
||||||
WGET_ARGS: List[str]
|
READWISE_READER_TOKENS: dict
|
||||||
CURL_ARGS: List[str]
|
|
||||||
GIT_ARGS: List[str]
|
|
||||||
TAG_SEPARATOR_PATTERN: str
|
|
||||||
|
|
||||||
|
|
||||||
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
|
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
|
||||||
|
|
|
@ -1,2 +1,3 @@
|
||||||
__package__ = 'archivebox.core'
|
__package__ = 'archivebox.core'
|
||||||
|
|
||||||
|
# default_app_config = 'core.apps.CoreAppConfig'
|
||||||
|
|
|
@ -12,6 +12,7 @@ from django.utils.html import format_html
|
||||||
from django.utils.safestring import mark_safe
|
from django.utils.safestring import mark_safe
|
||||||
from django.shortcuts import render, redirect
|
from django.shortcuts import render, redirect
|
||||||
from django.contrib.auth import get_user_model
|
from django.contrib.auth import get_user_model
|
||||||
|
from django.contrib.auth.models import Group, Permission
|
||||||
from django import forms
|
from django import forms
|
||||||
|
|
||||||
from ..util import htmldecode, urldecode, ansi_to_html
|
from ..util import htmldecode, urldecode, ansi_to_html
|
||||||
|
@ -159,6 +160,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
|
||||||
|
|
||||||
action_form = SnapshotActionForm
|
action_form = SnapshotActionForm
|
||||||
|
|
||||||
|
|
||||||
def changelist_view(self, request, extra_context=None):
|
def changelist_view(self, request, extra_context=None):
|
||||||
extra_context = extra_context or {}
|
extra_context = extra_context or {}
|
||||||
return super().changelist_view(request, extra_context | GLOBAL_CONTEXT)
|
return super().changelist_view(request, extra_context | GLOBAL_CONTEXT)
|
||||||
|
|
|
@ -1,9 +1,16 @@
|
||||||
from django.apps import AppConfig
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
|
||||||
class CoreConfig(AppConfig):
|
class CoreAppConfig(AppConfig):
|
||||||
name = 'core'
|
name = 'core'
|
||||||
|
|
||||||
|
# label = 'Archive Data'
|
||||||
|
verbose_name = "Archive Data"
|
||||||
|
|
||||||
|
# WIP: broken by Django 3.1.2 -> 4.0 migration
|
||||||
|
# default_auto_field = 'django.db.models.UUIDField'
|
||||||
|
|
||||||
|
|
||||||
def ready(self):
|
def ready(self):
|
||||||
from .auth import register_signals
|
from .auth import register_signals
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
import os
|
__package__ = 'archivebox.core'
|
||||||
from django.conf import settings
|
|
||||||
|
|
||||||
|
|
||||||
from ..config import (
|
from ..config import (
|
||||||
LDAP
|
LDAP
|
||||||
)
|
)
|
||||||
|
|
|
@ -50,7 +50,7 @@ class Tag(models.Model):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
verbose_name = "Tag"
|
verbose_name = "Tag"
|
||||||
verbose_name_plural = "Tags"
|
verbose_name_plural = "🏷️ Tags"
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.name
|
return self.name
|
||||||
|
@ -98,6 +98,10 @@ class Snapshot(models.Model):
|
||||||
|
|
||||||
keys = ('url', 'timestamp', 'title', 'tags', 'updated')
|
keys = ('url', 'timestamp', 'title', 'tags', 'updated')
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
verbose_name = "Snapshot"
|
||||||
|
verbose_name_plural = "⭐️ Archived Webpages (Snapshots)"
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
title = self.title or '-'
|
title = self.title or '-'
|
||||||
return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'
|
return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'
|
||||||
|
@ -282,5 +286,9 @@ class ArchiveResult(models.Model):
|
||||||
|
|
||||||
objects = ArchiveResultManager()
|
objects = ArchiveResultManager()
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
verbose_name = "ArchiveResult"
|
||||||
|
verbose_name_plural = "📑 Logs (ArchiveResults)"
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.extractor
|
return self.extractor
|
||||||
|
|
|
@ -1,5 +1,10 @@
|
||||||
__package__ = 'archivebox.core'
|
__package__ = 'archivebox.core'
|
||||||
|
|
||||||
|
# TODO: add this after we upgrade to Django >=3.2
|
||||||
|
# https://github.com/typeddjango/django-stubs
|
||||||
|
# import django_stubs_ext
|
||||||
|
# django_stubs_ext.monkeypatch()
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
|
@ -59,13 +64,88 @@ INSTALLED_APPS = [
|
||||||
'django.contrib.messages',
|
'django.contrib.messages',
|
||||||
'django.contrib.staticfiles',
|
'django.contrib.staticfiles',
|
||||||
'django.contrib.admin',
|
'django.contrib.admin',
|
||||||
|
'solo',
|
||||||
|
|
||||||
|
|
||||||
'core',
|
'core',
|
||||||
'api',
|
'api',
|
||||||
|
|
||||||
|
# Plugins
|
||||||
|
|
||||||
|
'plugins.defaults',
|
||||||
|
'plugins.system',
|
||||||
|
# 'plugins.replaywebpage', # provides UI to view WARC files
|
||||||
|
# 'plugins.gallerydl', # provides gallerydl dependency + extractor
|
||||||
|
# 'plugins.browsertrix', # provides browsertrix dependency + extractor
|
||||||
|
# 'plugins.playwright', # provides playwright dependency
|
||||||
|
# ...
|
||||||
|
# someday we may have enough plugins to justify dynamic loading:
|
||||||
|
# *(path.parent.name for path in (Path(PACKAGE_DIR) / 'plugins').glob('*/apps.py')),,
|
||||||
|
|
||||||
'django_extensions',
|
'django_extensions',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
### Staticfile and Template Settings
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
STATIC_URL = '/static/'
|
||||||
|
|
||||||
|
STATIC_ROOT = Path(PACKAGE_DIR) / 'collected_static'
|
||||||
|
|
||||||
|
STATICFILES_DIRS = [
|
||||||
|
*([str(CUSTOM_TEMPLATES_DIR / 'static')] if CUSTOM_TEMPLATES_DIR else []),
|
||||||
|
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'static'),
|
||||||
|
|
||||||
|
# Plugins
|
||||||
|
# str(Path(PACKAGE_DIR) / 'plugins/defaults/static'),
|
||||||
|
# str(Path(PACKAGE_DIR) / 'plugins/replaywebpage/static'),
|
||||||
|
# str(Path(PACKAGE_DIR) / 'plugins/gallerydl/static'),
|
||||||
|
# str(Path(PACKAGE_DIR) / 'plugins/browsertrix/static'),
|
||||||
|
# str(Path(PACKAGE_DIR) / 'plugins/playwright/static'),
|
||||||
|
# ...
|
||||||
|
# someday if there are many more plugins / user-addable plugins:
|
||||||
|
# *(str(path) for path in (Path(PACKAGE_DIR) / 'plugins').glob('*/static')),
|
||||||
|
]
|
||||||
|
|
||||||
|
MEDIA_URL = '/archive/'
|
||||||
|
MEDIA_ROOT = OUTPUT_DIR / 'archive'
|
||||||
|
|
||||||
|
|
||||||
|
TEMPLATE_DIRS = [
|
||||||
|
*([str(CUSTOM_TEMPLATES_DIR)] if CUSTOM_TEMPLATES_DIR else []),
|
||||||
|
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'core'),
|
||||||
|
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'admin'),
|
||||||
|
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME),
|
||||||
|
|
||||||
|
# Plugins
|
||||||
|
# added by plugins.<PluginName>.apps.<AppName>.ready -> .settings.register_plugin_settings
|
||||||
|
# str(Path(PACKAGE_DIR) / 'plugins/defaults/templates'),
|
||||||
|
# str(Path(PACKAGE_DIR) / 'plugins/replaywebpage/templates'),
|
||||||
|
# str(Path(PACKAGE_DIR) / 'plugins/gallerydl/templates'),
|
||||||
|
# str(Path(PACKAGE_DIR) / 'plugins/browsertrix/templates'),
|
||||||
|
# str(Path(PACKAGE_DIR) / 'plugins/playwright/templates'),
|
||||||
|
# ...
|
||||||
|
#
|
||||||
|
# someday if there are many more plugins / user-addable plugins:
|
||||||
|
# *(str(path) for path in (Path(PACKAGE_DIR) / 'plugins').glob('*/templates')),
|
||||||
|
]
|
||||||
|
|
||||||
|
TEMPLATES = [
|
||||||
|
{
|
||||||
|
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
||||||
|
'DIRS': TEMPLATE_DIRS,
|
||||||
|
'APP_DIRS': True,
|
||||||
|
'OPTIONS': {
|
||||||
|
'context_processors': [
|
||||||
|
'django.template.context_processors.debug',
|
||||||
|
'django.template.context_processors.request',
|
||||||
|
'django.contrib.auth.context_processors.auth',
|
||||||
|
'django.contrib.messages.context_processors.messages',
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
# For usage with https://www.jetadmin.io/integrations/django
|
# For usage with https://www.jetadmin.io/integrations/django
|
||||||
# INSTALLED_APPS += ['jet_django']
|
# INSTALLED_APPS += ['jet_django']
|
||||||
|
@ -163,7 +243,7 @@ if DEBUG_TOOLBAR:
|
||||||
'debug_toolbar.panels.request.RequestPanel',
|
'debug_toolbar.panels.request.RequestPanel',
|
||||||
'debug_toolbar.panels.sql.SQLPanel',
|
'debug_toolbar.panels.sql.SQLPanel',
|
||||||
'debug_toolbar.panels.staticfiles.StaticFilesPanel',
|
'debug_toolbar.panels.staticfiles.StaticFilesPanel',
|
||||||
# 'debug_toolbar.panels.templates.TemplatesPanel',
|
# 'debug_toolbar.panels.templates.TemplatesPanel', # buggy/slow
|
||||||
'debug_toolbar.panels.cache.CachePanel',
|
'debug_toolbar.panels.cache.CachePanel',
|
||||||
'debug_toolbar.panels.signals.SignalsPanel',
|
'debug_toolbar.panels.signals.SignalsPanel',
|
||||||
'debug_toolbar.panels.logging.LoggingPanel',
|
'debug_toolbar.panels.logging.LoggingPanel',
|
||||||
|
@ -173,39 +253,6 @@ if DEBUG_TOOLBAR:
|
||||||
]
|
]
|
||||||
MIDDLEWARE = [*MIDDLEWARE, 'debug_toolbar.middleware.DebugToolbarMiddleware']
|
MIDDLEWARE = [*MIDDLEWARE, 'debug_toolbar.middleware.DebugToolbarMiddleware']
|
||||||
|
|
||||||
################################################################################
|
|
||||||
### Staticfile and Template Settings
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
STATIC_URL = '/static/'
|
|
||||||
|
|
||||||
STATICFILES_DIRS = [
|
|
||||||
*([str(CUSTOM_TEMPLATES_DIR / 'static')] if CUSTOM_TEMPLATES_DIR else []),
|
|
||||||
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'static'),
|
|
||||||
]
|
|
||||||
|
|
||||||
TEMPLATE_DIRS = [
|
|
||||||
*([str(CUSTOM_TEMPLATES_DIR)] if CUSTOM_TEMPLATES_DIR else []),
|
|
||||||
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'core'),
|
|
||||||
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'admin'),
|
|
||||||
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME),
|
|
||||||
]
|
|
||||||
|
|
||||||
TEMPLATES = [
|
|
||||||
{
|
|
||||||
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
|
||||||
'DIRS': TEMPLATE_DIRS,
|
|
||||||
'APP_DIRS': True,
|
|
||||||
'OPTIONS': {
|
|
||||||
'context_processors': [
|
|
||||||
'django.template.context_processors.debug',
|
|
||||||
'django.template.context_processors.request',
|
|
||||||
'django.contrib.auth.context_processors.auth',
|
|
||||||
'django.contrib.messages.context_processors.messages',
|
|
||||||
],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
|
@ -312,21 +359,21 @@ IGNORABLE_404_URLS = [
|
||||||
]
|
]
|
||||||
|
|
||||||
class NoisyRequestsFilter(logging.Filter):
|
class NoisyRequestsFilter(logging.Filter):
|
||||||
def filter(self, record):
|
def filter(self, record) -> bool:
|
||||||
logline = record.getMessage()
|
logline = record.getMessage()
|
||||||
|
|
||||||
# ignore harmless 404s for the patterns in IGNORABLE_404_URLS
|
# ignore harmless 404s for the patterns in IGNORABLE_404_URLS
|
||||||
for ignorable_url_pattern in IGNORABLE_404_URLS:
|
for ignorable_url_pattern in IGNORABLE_404_URLS:
|
||||||
ignorable_log_pattern = re.compile(f'^"GET /.*/?{ignorable_url_pattern.pattern[:-1]} HTTP/.*" (200|30.|404) .+$', re.I | re.M)
|
ignorable_log_pattern = re.compile(f'^"GET /.*/?{ignorable_url_pattern.pattern[:-1]} HTTP/.*" (200|30.|404) .+$', re.I | re.M)
|
||||||
if ignorable_log_pattern.match(logline):
|
if ignorable_log_pattern.match(logline):
|
||||||
return 0
|
return False
|
||||||
|
|
||||||
# ignore staticfile requests that 200 or 30*
|
# ignore staticfile requests that 200 or 30*
|
||||||
ignoreable_200_log_pattern = re.compile(r'"GET /static/.* HTTP/.*" (200|30.) .+', re.I | re.M)
|
ignoreable_200_log_pattern = re.compile(r'"GET /static/.* HTTP/.*" (200|30.) .+', re.I | re.M)
|
||||||
if ignoreable_200_log_pattern.match(logline):
|
if ignoreable_200_log_pattern.match(logline):
|
||||||
return 0
|
return False
|
||||||
|
|
||||||
return 1
|
return True
|
||||||
|
|
||||||
if LOGS_DIR.exists():
|
if LOGS_DIR.exists():
|
||||||
ERROR_LOG = (LOGS_DIR / 'errors.log')
|
ERROR_LOG = (LOGS_DIR / 'errors.log')
|
||||||
|
|
|
@ -32,6 +32,10 @@ urlpatterns = [
|
||||||
|
|
||||||
path('archive/', RedirectView.as_view(url='/')),
|
path('archive/', RedirectView.as_view(url='/')),
|
||||||
path('archive/<path:path>', SnapshotView.as_view(), name='Snapshot'),
|
path('archive/<path:path>', SnapshotView.as_view(), name='Snapshot'),
|
||||||
|
path('web/<path:path>', SnapshotView.as_view()), # support archive.org-style URLs
|
||||||
|
|
||||||
|
path('plugins/replaywebpage/', include('plugins.replaywebpage.urls')),
|
||||||
|
# ... dynamic load these someday if there are more of them
|
||||||
|
|
||||||
path('admin/core/snapshot/add/', RedirectView.as_view(url='/add/')),
|
path('admin/core/snapshot/add/', RedirectView.as_view(url='/add/')),
|
||||||
path('add/', AddView.as_view(), name='add'),
|
path('add/', AddView.as_view(), name='add'),
|
||||||
|
|
|
@ -56,12 +56,18 @@ class SnapshotView(View):
|
||||||
slug, archivefile = path.split('/', 1)[0], 'index.html'
|
slug, archivefile = path.split('/', 1)[0], 'index.html'
|
||||||
|
|
||||||
# slug is a timestamp
|
# slug is a timestamp
|
||||||
if slug.replace('.','').isdigit():
|
if slug.replace('.', '').isdigit():
|
||||||
|
|
||||||
# missing trailing slash -> redirect to index
|
# missing trailing slash -> redirect to index
|
||||||
if '/' not in path:
|
if '/' not in path:
|
||||||
return redirect(f'{path}/index.html')
|
return redirect(f'{path}/index.html')
|
||||||
|
|
||||||
|
# TODO: add support for archive.org-style URLs where timestamp may be a human-readable date
|
||||||
|
# https://web.archivebox.io / web / 2022-01 / https://example.com
|
||||||
|
# https://web.archivebox.io / web / 20220505103616 / https://example.com
|
||||||
|
# https://web.archivebox.io / web / 2022-05-05__0:36:16 / https://example.com
|
||||||
|
# use archivebox.util.parse_date (supports unix timestamps, iso date strings, and lots more etc.)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
snapshot = Snapshot.objects.get(Q(timestamp=slug) | Q(id__startswith=slug))
|
snapshot = Snapshot.objects.get(Q(timestamp=slug) | Q(id__startswith=slug))
|
||||||
|
|
|
@ -7,7 +7,7 @@ if __name__ == '__main__':
|
||||||
# versions of ./manage.py commands whenever possible. When that's not possible
|
# versions of ./manage.py commands whenever possible. When that's not possible
|
||||||
# (e.g. makemigrations), you can comment out this check temporarily
|
# (e.g. makemigrations), you can comment out this check temporarily
|
||||||
|
|
||||||
if not ('makemigrations' in sys.argv or 'migrate' in sys.argv):
|
if not ('makemigrations' in sys.argv or 'migrate' in sys.argv or 'collectstatic' in sys.argv):
|
||||||
print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):")
|
print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):")
|
||||||
print()
|
print()
|
||||||
print(' Hint: Use these archivebox CLI commands instead of the ./manage.py equivalents:')
|
print(' Hint: Use these archivebox CLI commands instead of the ./manage.py equivalents:')
|
||||||
|
|
|
@ -1,3 +0,0 @@
|
||||||
[mypy]
|
|
||||||
plugins =
|
|
||||||
mypy_django_plugin.main
|
|
3
archivebox/plugins/__init__.py
Normal file
3
archivebox/plugins/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
__package__ = 'archivebox.plugins'
|
||||||
|
|
||||||
|
|
3
archivebox/plugins/defaults/__init__.py
Normal file
3
archivebox/plugins/defaults/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
__package__ = 'archivebox.plugins.defaults'
|
||||||
|
|
||||||
|
default_app_config = 'plugins.defaults.apps.DefaultsPluginAppConfig'
|
20
archivebox/plugins/defaults/admin.py
Normal file
20
archivebox/plugins/defaults/admin.py
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
from django.contrib import admin
|
||||||
|
from solo.admin import SingletonModelAdmin
|
||||||
|
|
||||||
|
from .models import (
|
||||||
|
ArchiveBoxDefaultDependency,
|
||||||
|
ArchiveBoxDefaultExtractor,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DependencyAdmin(SingletonModelAdmin):
|
||||||
|
readonly_fields = ('id', 'NAME', 'LABEL', 'REQUIRED', 'bin_path', 'bin_version', 'is_valid', 'is_enabled')
|
||||||
|
|
||||||
|
class ExtractorAdmin(SingletonModelAdmin):
|
||||||
|
readonly_fields = ('id', 'NAME', 'LABEL', 'DEFAULT_ENABLED', 'DEFAULT_CMD', 'CMD', 'ARGS', 'TIMEOUT', 'dependency', 'is_valid', 'is_enabled')
|
||||||
|
|
||||||
|
print('DefaultsPluginConfig.admin')
|
||||||
|
|
||||||
|
|
||||||
|
admin.site.register(ArchiveBoxDefaultDependency, DependencyAdmin)
|
||||||
|
admin.site.register(ArchiveBoxDefaultExtractor, ExtractorAdmin)
|
24
archivebox/plugins/defaults/apps.py
Normal file
24
archivebox/plugins/defaults/apps.py
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
# __package__ = 'archivebox.plugins.defaults'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
|
||||||
|
class DefaultsPluginAppConfig(AppConfig):
|
||||||
|
name = "plugins.defaults"
|
||||||
|
|
||||||
|
# label = "ArchiveBox Defaults"
|
||||||
|
verbose_name = "Plugin Configuration Defaults"
|
||||||
|
|
||||||
|
default_auto_field = "django.db.models.AutoField"
|
||||||
|
|
||||||
|
def ready(self):
|
||||||
|
print('plugins.defaults.apps.DefaultsPluginConfig.ready')
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from .settings import register_plugin_settings
|
||||||
|
|
||||||
|
register_plugin_settings(settings, name=self.name)
|
||||||
|
|
41
archivebox/plugins/defaults/migrations/0001_initial.py
Normal file
41
archivebox/plugins/defaults/migrations/0001_initial.py
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
# Generated by Django 3.1.14 on 2024-01-24 08:56
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
initial = True
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='ArchiveBoxDefaultDependency',
|
||||||
|
fields=[
|
||||||
|
('ENABLED', models.BooleanField(default=True, editable=False)),
|
||||||
|
('BINARY', models.CharField(default='/bin/false', max_length=255)),
|
||||||
|
('ARGS', models.CharField(default='', max_length=255)),
|
||||||
|
('id', models.AutoField(default=1, primary_key=True, serialize=False)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'Dependency Configuration Defaults',
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='ArchiveBoxDefaultExtractor',
|
||||||
|
fields=[
|
||||||
|
('ENABLED', models.BooleanField(default=True)),
|
||||||
|
('CMD', models.CharField(default=['{DEPENDENCY.BINARY}', '{ARGS}', '{url}'], max_length=255)),
|
||||||
|
('ARGS', models.CharField(default=['--timeout={TIMEOUT}'], max_length=255)),
|
||||||
|
('TIMEOUT', models.CharField(default='{TIMEOUT}', max_length=255)),
|
||||||
|
('id', models.AutoField(default=1, primary_key=True, serialize=False)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'Extractor Configuration Defaults',
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
|
@ -0,0 +1,31 @@
|
||||||
|
# Generated by Django 3.1.14 on 2024-01-24 09:43
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('defaults', '0001_initial'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterModelOptions(
|
||||||
|
name='archiveboxdefaultdependency',
|
||||||
|
options={'verbose_name': 'Default Configuration: Dependencies'},
|
||||||
|
),
|
||||||
|
migrations.AlterModelOptions(
|
||||||
|
name='archiveboxdefaultextractor',
|
||||||
|
options={'verbose_name': 'Default Configuration: Extractors'},
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveboxdefaultdependency',
|
||||||
|
name='BINARY',
|
||||||
|
field=models.CharField(default='/bin/bash', max_length=255),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveboxdefaultdependency',
|
||||||
|
name='ENABLED',
|
||||||
|
field=models.BooleanField(default=True),
|
||||||
|
),
|
||||||
|
]
|
0
archivebox/plugins/defaults/migrations/__init__.py
Normal file
0
archivebox/plugins/defaults/migrations/__init__.py
Normal file
385
archivebox/plugins/defaults/models.py
Normal file
385
archivebox/plugins/defaults/models.py
Normal file
|
@ -0,0 +1,385 @@
|
||||||
|
__package__ = 'archivebox.plugins.defaults'
|
||||||
|
|
||||||
|
# import shutil
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.db import models, transaction
|
||||||
|
from django.utils.functional import cached_property
|
||||||
|
|
||||||
|
from solo.models import SingletonModel # type: ignore[import-untyped]
|
||||||
|
|
||||||
|
|
||||||
|
from config import bin_path, bin_version
|
||||||
|
|
||||||
|
ConfigDict = Dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
|
# def bin_path(binary: str) -> str | None:
|
||||||
|
# return shutil.which(str(Path(binary).expanduser())) or shutil.which(str(binary)) or binary
|
||||||
|
|
||||||
|
# def bin_version(bin_path: str, cmd: str | None=None) -> str | None:
|
||||||
|
# return '0.0.0'
|
||||||
|
|
||||||
|
# def pretty_path(path: Path) -> str:
|
||||||
|
# """take a Path object and return the path as a string relative to the current directory"""
|
||||||
|
|
||||||
|
# if not path:
|
||||||
|
# return ''
|
||||||
|
|
||||||
|
# return str(path.expanduser().resolve().relative_to(Path.cwd().resolve()))
|
||||||
|
|
||||||
|
|
||||||
|
class ArchiveBoxBaseDependency(models.Model):
|
||||||
|
singleton_instance_id = 1
|
||||||
|
|
||||||
|
id = models.AutoField(default=singleton_instance_id, primary_key=True)
|
||||||
|
|
||||||
|
NAME = 'DEFAULT'
|
||||||
|
LABEL = "Default"
|
||||||
|
REQUIRED = False
|
||||||
|
|
||||||
|
PARENT_DEPENDENCIES: List[str] = []
|
||||||
|
|
||||||
|
BIN_DEPENDENCIES: List[str] = []
|
||||||
|
APT_DEPENDENCIES: List[str] = []
|
||||||
|
BREW_DEPENDENCIES: List[str] = []
|
||||||
|
PIP_DEPENDENCIES: List[str] = []
|
||||||
|
NPM_DEPENDENCIES: List[str] = []
|
||||||
|
|
||||||
|
DEFAULT_BINARY: str | None = '/bin/bash'
|
||||||
|
DEFAULT_START_CMD: str | None = '/bin/bash -c "while true; do sleep 1; done"'
|
||||||
|
DEFAULT_PID_FILE: str | None = 'logs/{NAME}_WORKER.pid'
|
||||||
|
DEFAULT_STOP_CMD: str | None = 'kill "$(<{PID_FILE})"'
|
||||||
|
DEFAULT_VERSION_COMMAND: str | None = '{BINARY} --version'
|
||||||
|
DEFAULT_ARGS: str | None = ''
|
||||||
|
|
||||||
|
VERSION_CMD = '{BINARY} --version'
|
||||||
|
|
||||||
|
ENABLED = models.BooleanField(default=True, editable=False)
|
||||||
|
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY)
|
||||||
|
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS)
|
||||||
|
|
||||||
|
# START_CMD = models.CharField(max_length=255, default=DEFAULT_START_CMD)
|
||||||
|
# WORKERS = models.IntegerField(default=1)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = True
|
||||||
|
app_label = 'defaults'
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"{self.LABEL} Dependency Configuration"
|
||||||
|
|
||||||
|
def __json__(self):
|
||||||
|
return {
|
||||||
|
'type': 'ArchiveBoxDependency',
|
||||||
|
'__class__': self.__class__.__name__,
|
||||||
|
'NAME': self.NAME,
|
||||||
|
'LABEL': self.LABEL,
|
||||||
|
'ENABLED': self.ENABLED,
|
||||||
|
'BINARY': self.BINARY,
|
||||||
|
'ARGS': self.ARGS,
|
||||||
|
# 'START_CMD': self.START_CMD,
|
||||||
|
# 'WORKERS': self.WORKERS,
|
||||||
|
}
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def bin_path(self) -> str:
|
||||||
|
return bin_path(self.BINARY or self.DEFAULT_BINARY)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def bin_version(self) -> str | None:
|
||||||
|
print(f'ArchiveBoxBaseDependency.bin_version({self.bin_path}, cmd={self.VERSION_CMD.format(BINARY=self.BINARY)})')
|
||||||
|
return bin_version(self.bin_path, cmd=self.VERSION_CMD.format(BINARY=self.BINARY))
|
||||||
|
# return bin_version(self.bin_path, cmd=self.VERSION_CMD)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def is_valid(self) -> bool:
|
||||||
|
return bool(self.bin_path and self.bin_version)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def is_enabled(self) -> bool:
|
||||||
|
return bool(self.ENABLED and self.is_valid)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def pretty_version(self) -> str:
|
||||||
|
if self.is_enabled:
|
||||||
|
if self.is_valid:
|
||||||
|
color, symbol, note, version = 'green', '√', 'valid', ''
|
||||||
|
|
||||||
|
parsed_version_num = re.search(r'[\d\.]+', self.bin_version)
|
||||||
|
if parsed_version_num:
|
||||||
|
version = f'v{parsed_version_num[0]}'
|
||||||
|
|
||||||
|
if not self.bin_version:
|
||||||
|
color, symbol, note, version = 'red', 'X', 'invalid', '?'
|
||||||
|
else:
|
||||||
|
color, symbol, note, version = 'lightyellow', '-', 'disabled', '-'
|
||||||
|
|
||||||
|
path = pretty_path(self.bin_path)
|
||||||
|
|
||||||
|
return ' '.join((
|
||||||
|
ANSI[color],
|
||||||
|
symbol,
|
||||||
|
ANSI['reset'],
|
||||||
|
name.ljust(21),
|
||||||
|
version.ljust(14),
|
||||||
|
ANSI[color],
|
||||||
|
note.ljust(8),
|
||||||
|
ANSI['reset'],
|
||||||
|
path.ljust(76),
|
||||||
|
))
|
||||||
|
|
||||||
|
# @helper
|
||||||
|
def install_parents(self, config):
|
||||||
|
return {
|
||||||
|
# parent_dependency.NAME: parent_dependency.get_solo().install_self()
|
||||||
|
parent_dependency: parent_dependency
|
||||||
|
for parent_dependency in self.PARENT_DEPENDENCIES
|
||||||
|
}
|
||||||
|
|
||||||
|
# @helper
|
||||||
|
def install_self(self, config):
|
||||||
|
assert all(self.install_parents(config=config).values())
|
||||||
|
|
||||||
|
BashEnvironmentDependency.get_solo().install_pkgs(self.BIN_DEPENDENCIES)
|
||||||
|
AptEnvironmentDependency.get_solo().install_pkgs(self.APT_DEPENDENCIES)
|
||||||
|
BrewEnvironmentDependency.get_solo().install_pkgs(self.BREW_DEPENDENCIES)
|
||||||
|
PipEnvironmentDependency.get_solo().install_pkgs(self.PIP_DEPENDENCIES)
|
||||||
|
NPMEnvironmentDependency.get_solo().install_pkgs(self.NPM_DEPENDENCIES)
|
||||||
|
|
||||||
|
assert self.is_valid
|
||||||
|
return self.bin_version
|
||||||
|
|
||||||
|
# @task
|
||||||
|
def run(args, pwd, timeout):
|
||||||
|
errors = None
|
||||||
|
timer = TimedProgress(timeout, prefix=' ')
|
||||||
|
try:
|
||||||
|
proc = run(cmd=[self.bin_path, *args], pwd=pwd, timeout=timeout)
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
errors = err
|
||||||
|
finally:
|
||||||
|
timer.end()
|
||||||
|
|
||||||
|
return proc, timer, errors
|
||||||
|
|
||||||
|
class ArchiveBoxDefaultDependency(ArchiveBoxBaseDependency, SingletonModel):
|
||||||
|
singleton_instance_id = 1
|
||||||
|
|
||||||
|
id = models.AutoField(default=singleton_instance_id, primary_key=True)
|
||||||
|
|
||||||
|
ENABLED = models.BooleanField(default=True, editable=True)
|
||||||
|
|
||||||
|
class Meta: # pyright: ignore [reportIncompatibleVariableOverride]
|
||||||
|
abstract = False
|
||||||
|
app_label = 'defaults'
|
||||||
|
verbose_name = 'Default Configuration: Dependencies'
|
||||||
|
|
||||||
|
|
||||||
|
class ArchiveBoxBaseExtractor(models.Model):
|
||||||
|
singleton_instance_id = 1
|
||||||
|
|
||||||
|
id = models.AutoField(default=singleton_instance_id, primary_key=True)
|
||||||
|
|
||||||
|
NAME = 'DEFAULT'
|
||||||
|
LABEL = 'Default'
|
||||||
|
|
||||||
|
DEFAULT_DEPENDENCY = ArchiveBoxDefaultDependency
|
||||||
|
DEPENDENCY = DEFAULT_DEPENDENCY
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_ENABLED = True
|
||||||
|
DEFAULT_CMD = ['{DEPENDENCY.BINARY}', '{ARGS}', '{url}']
|
||||||
|
DEFAULT_ARGS = ['--timeout={TIMEOUT}']
|
||||||
|
DEFAULT_TIMEOUT = '{TIMEOUT}'
|
||||||
|
# DEFAULT_USER_AGENT = '{USER_AGENT}'
|
||||||
|
# DEFAULT_COOKIES_TXT = '{COOKIES_TXT}'
|
||||||
|
|
||||||
|
ENABLED = models.BooleanField(default=DEFAULT_ENABLED, editable=True)
|
||||||
|
|
||||||
|
CMD = models.CharField(max_length=255, default=DEFAULT_CMD)
|
||||||
|
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS)
|
||||||
|
TIMEOUT = models.CharField(max_length=255, default=DEFAULT_TIMEOUT)
|
||||||
|
|
||||||
|
ALIASES = {
|
||||||
|
'ENABLED': (f'SAVE_{NAME}', f'USE_{NAME}', f'FETCH_{NAME}'),
|
||||||
|
}
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"{self.LABEL} Extractor Configuration"
|
||||||
|
|
||||||
|
class Meta: # pyright: ignore [reportIncompatibleVariableOverride]
|
||||||
|
abstract = True
|
||||||
|
verbose_name = "Default Extractor Configuration"
|
||||||
|
app_label = 'defaults'
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def dependency(self):
|
||||||
|
return self.DEPENDENCY.get_solo()
|
||||||
|
|
||||||
|
def __json__(self):
|
||||||
|
return {
|
||||||
|
'type': 'ArchiveBoxExtractor',
|
||||||
|
'__class__': self.__class__.__name__,
|
||||||
|
'NAME': self.NAME,
|
||||||
|
'LABEL': self.LABEL,
|
||||||
|
'ENABLED': self.ENABLED,
|
||||||
|
'DEPENDENCY': self.dependency.__json__(),
|
||||||
|
'ARGS': self.ARGS,
|
||||||
|
'CMD': self.CMD,
|
||||||
|
'TIMEOUT': self.TIMEOUT,
|
||||||
|
'is_valid': self.is_valid,
|
||||||
|
'is_enabled': self.is_enabled,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def format_args(self, csv: List[str], **config):
|
||||||
|
un_prefixed_config = {**self.__json__()} # e.g. ENABLED=True
|
||||||
|
prefixed_config = { # e.g. GALLERYDL_ENABLED=True
|
||||||
|
f'{self.NAME}_{key}': value
|
||||||
|
for key, value in un_prefixed_config.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
merged_config = {
|
||||||
|
**config, # e.g. TIMEOUT=60
|
||||||
|
**un_prefixed_config, # e.g. ENABLED=True
|
||||||
|
**prefixed_config, # e.g. GALLERYDL_ENABLED=True
|
||||||
|
}
|
||||||
|
formatted_config = [
|
||||||
|
arg.format(**merged_config)
|
||||||
|
for arg in csv
|
||||||
|
]
|
||||||
|
|
||||||
|
return formatted_config
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def is_valid(self):
|
||||||
|
if not self.dependency.is_valid:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# TIMEOUT must be at least 5 seconds
|
||||||
|
# if self.TIMEOUT < 5:
|
||||||
|
# return False
|
||||||
|
|
||||||
|
# assert Path(self.COOKIES_TXT).exists()
|
||||||
|
# TODO: validate user agent with uaparser
|
||||||
|
# TODO: validate args, cookies.txt?
|
||||||
|
return True
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def is_enabled(self):
|
||||||
|
return self.ENABLED and self.is_valid and self.dependency.is_enabled
|
||||||
|
|
||||||
|
|
||||||
|
def save(self, *args, **kwargs):
|
||||||
|
# assert self.is_valid
|
||||||
|
|
||||||
|
with transaction.atomic():
|
||||||
|
result = super().save(*args, **kwargs)
|
||||||
|
# post to message bus:
|
||||||
|
print({
|
||||||
|
'type': f'{self.__class__.__name__}.save',
|
||||||
|
'diff': self.__json__(),
|
||||||
|
'kwargs': kwargs,
|
||||||
|
})
|
||||||
|
# potential consumers of this event:
|
||||||
|
# - event logger: write to events.log
|
||||||
|
# - config file updater: writes to ArchiveBox.conf
|
||||||
|
# - supervisor: restarts relevant dependencies/extractors
|
||||||
|
# - etc...
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def out_dir(self, url: str, snapshot_dir: Path, config: ConfigDict):
|
||||||
|
return (snapshot_dir / self.NAME)
|
||||||
|
|
||||||
|
def create_out_dir(self, url: str, snapshot_dir: Path, config: ConfigDict):
|
||||||
|
out_dir = self.out_dir(url=url, snapshot_dir=snapshot_dir, config=config)
|
||||||
|
return out_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
def should_extract(self, url: str, snapshot_dir: Path, config: ConfigDict):
|
||||||
|
# return False if extractor is disabled
|
||||||
|
if not self.is_enabled:
|
||||||
|
return False
|
||||||
|
|
||||||
|
out_dir = self.out_dir(url=url, snapshot_dir=snapshot_dir, config=config)
|
||||||
|
|
||||||
|
if has_existing_output := out_dir.glob('*'):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not (has_write_access := os.access(out_dir, os.W_OK | os.X_OK)):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def get_dependency_cmd(self, url: str, extractor_dir: Path, config: ConfigDict):
|
||||||
|
return [
|
||||||
|
self.format_args(self.CMD, **config),
|
||||||
|
url,
|
||||||
|
*self.format_args(self.ARGS, **config), # TODO: split and requote this properly
|
||||||
|
]
|
||||||
|
|
||||||
|
# @requires_config('HOSTNAME', 'TIMEOUT', 'USER_AGENT', 'CHECK_SSL_VALIDITY')
|
||||||
|
def extract(self, url: str, snapshot_dir: Path, config: ConfigDict):
|
||||||
|
if not self.ENABLED:
|
||||||
|
return
|
||||||
|
|
||||||
|
extractor_dir = self.create_extractor_directory(snapshot_dir)
|
||||||
|
|
||||||
|
cmd = self.get_dependency_cmd(url=url, extractor_dir=extractor_dir, config=config)
|
||||||
|
|
||||||
|
status, stdout, stderr, output_path = 'failed', '', '', None
|
||||||
|
try:
|
||||||
|
proc, timer, errors = self.dependency.run(cmd, cwd=extractor_dir, timeout=self.TIMEOUT)
|
||||||
|
stdout, stderr = proc.stdout, proc.stderr
|
||||||
|
|
||||||
|
if 'ERROR: Unsupported URL' in stderr:
|
||||||
|
hints = ('gallery-dl doesnt support this type of url yet',)
|
||||||
|
raise ArchiveError('Failed to save gallerydl', hints)
|
||||||
|
|
||||||
|
if proc.returncode == 0 and 'finished' in stdout:
|
||||||
|
output_path = extractor_dir / 'index.html'
|
||||||
|
status = 'succeeded'
|
||||||
|
except Exception as err:
|
||||||
|
stderr += err
|
||||||
|
|
||||||
|
num_bytes, num_dirs, num_files = get_dir_size(extractor_dir)
|
||||||
|
|
||||||
|
return ArchiveResult(
|
||||||
|
cmd=cmd,
|
||||||
|
pwd=str(out_dir),
|
||||||
|
cmd_version=self.dependency.bin_version,
|
||||||
|
cmd_path=self.dependency.bin_path,
|
||||||
|
cmd_hostname=config.HOSTNAME,
|
||||||
|
|
||||||
|
output_path=output_path,
|
||||||
|
stdout=stdout,
|
||||||
|
stderr=stderr,
|
||||||
|
status=status,
|
||||||
|
|
||||||
|
num_bytes=num_bytes,
|
||||||
|
num_files=num_files,
|
||||||
|
num_dirs=num_dirs,
|
||||||
|
**timer.stats,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ArchiveBoxDefaultExtractor(ArchiveBoxBaseExtractor, SingletonModel):
|
||||||
|
singleton_instance_id = 1
|
||||||
|
|
||||||
|
id = models.AutoField(default=singleton_instance_id, primary_key=True)
|
||||||
|
|
||||||
|
DEPENDENCY = ArchiveBoxDefaultDependency
|
||||||
|
|
||||||
|
ENABLED = models.BooleanField(default=True, editable=True)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = False
|
||||||
|
app_label = 'defaults'
|
||||||
|
verbose_name = 'Default Configuration: Extractors'
|
15
archivebox/plugins/defaults/settings.py
Normal file
15
archivebox/plugins/defaults/settings.py
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
def register_plugin_settings(settings=settings, name='defaults'):
|
||||||
|
|
||||||
|
settings.STATICFILES_DIRS += [
|
||||||
|
str(Path(settings.PACKAGE_DIR) / f'plugins/{name}/static'),
|
||||||
|
]
|
||||||
|
|
||||||
|
settings.TEMPLATE_DIRS += [
|
||||||
|
str(Path(settings.PACKAGE_DIR) / f'plugins/{name}/templates'),
|
||||||
|
]
|
||||||
|
|
||||||
|
print('REGISTERED PLUGIN SETTINGS', name)
|
1
archivebox/plugins/gallerydl/__init__.py
Normal file
1
archivebox/plugins/gallerydl/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
__package__ = 'archivebox.plugins.replaywebpage'
|
8
archivebox/plugins/gallerydl/admin.py
Normal file
8
archivebox/plugins/gallerydl/admin.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
from django.contrib import admin
|
||||||
|
from solo.admin import SingletonModelAdmin
|
||||||
|
|
||||||
|
from .models import GalleryDLDependency, GalleryDLExtractor
|
||||||
|
|
||||||
|
|
||||||
|
admin.site.register(GalleryDLDependency, SingletonModelAdmin)
|
||||||
|
admin.site.register(GalleryDLExtractor, SingletonModelAdmin)
|
13
archivebox/plugins/gallerydl/apps.py
Normal file
13
archivebox/plugins/gallerydl/apps.py
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
|
||||||
|
class GalleryDLAppConfig(AppConfig):
|
||||||
|
label = "Gallery-DL"
|
||||||
|
name = "plugin_gallerydl"
|
||||||
|
|
||||||
|
default_auto_field = "django.db.models.BigAutoField"
|
||||||
|
|
||||||
|
def ready(self):
|
||||||
|
# querying models is ok, but don't fetch rows from DB or perform stateful actions here
|
||||||
|
|
||||||
|
print('√ Loaded GalleryDL Plugin')
|
50
archivebox/plugins/gallerydl/extractors.py
Normal file
50
archivebox/plugins/gallerydl/extractors.py
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
# browsertrix extractor
|
||||||
|
|
||||||
|
def save_browsertrix(link, out_dir, timeout, config):
|
||||||
|
|
||||||
|
|
||||||
|
browsertrix_dir = out_dir / 'browsertrix'
|
||||||
|
browsertrix_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
crawl_id = link.timestamp
|
||||||
|
|
||||||
|
browsertrix_crawler_cmd = [
|
||||||
|
'crawl',
|
||||||
|
f'--url', link.url,
|
||||||
|
f'--collection={crawl_id}',
|
||||||
|
'--scopeType=page',
|
||||||
|
'--generateWACZ',
|
||||||
|
'--text=final-to-warc',
|
||||||
|
'--timeLimit=60',
|
||||||
|
]
|
||||||
|
|
||||||
|
remote_cmd = """
|
||||||
|
rm /tmp/dump.rdb;
|
||||||
|
rm -rf /crawls/collections;
|
||||||
|
mkdir /crawls/collections;
|
||||||
|
env CRAWL_ID={crawl_id}
|
||||||
|
"""
|
||||||
|
|
||||||
|
local_cmd = ['nc', 'browsertrix', '2222']
|
||||||
|
|
||||||
|
status = 'succeeded'
|
||||||
|
timer = TimedProgress(timeout, prefix=' ')
|
||||||
|
try:
|
||||||
|
result = run(local_cmd, cwd=str(out_dir), input=remote_cmd, timeout=timeout)
|
||||||
|
|
||||||
|
cmd_output = result.stdout.decode()
|
||||||
|
|
||||||
|
wacz_output_file = Path('/browsertrix/crawls') / crawl_id / f'{crawl_id}'.wacz
|
||||||
|
|
||||||
|
copy_and_overwrite(wacz_output_file, browsertrix_dir / wacz_output_file.name)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
TEMPLATE = """
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
# rm /tmp/dump.rdb;
|
||||||
|
# rm -rf /crawls/collections;
|
||||||
|
# mkdir /crawls/collections;
|
||||||
|
# env CRAWL_ID=tec2342 crawl --url 'https://example.com' --scopeType page --generateWACZ --collection tec2342 --text final-to-warc --timeLimit 60
|
121
archivebox/plugins/gallerydl/models.py
Normal file
121
archivebox/plugins/gallerydl/models.py
Normal file
|
@ -0,0 +1,121 @@
|
||||||
|
from django.db import models
|
||||||
|
from django.utils.functional import cached_property
|
||||||
|
|
||||||
|
from solo.models import SingletonModel
|
||||||
|
|
||||||
|
from archivebox.plugins.defaults.models import (
|
||||||
|
ArchiveBoxDefaultDependency,
|
||||||
|
ArchiveBoxDefaultExtractor,
|
||||||
|
BashEnvironmentDependency,
|
||||||
|
PipEnvironmentDependency,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GalleryDLDependency(ArchiveBoxDefaultDependency, SingletonModel):
|
||||||
|
NAME = 'GALLERYDL'
|
||||||
|
LABEL = "GalleryDL"
|
||||||
|
REQUIRED = False
|
||||||
|
|
||||||
|
PARENT_DEPENDENCIES = [
|
||||||
|
BashEnvironmentDependency,
|
||||||
|
PipEnvironmentDependency,
|
||||||
|
]
|
||||||
|
|
||||||
|
BIN_DEPENDENCIES = ['gallery-dl']
|
||||||
|
APT_DEPENDENCIES = []
|
||||||
|
BREW_DEPENDENCIES = []
|
||||||
|
PIP_PACKAGES = ['gallery-dl']
|
||||||
|
NPM_PACKAGES = []
|
||||||
|
|
||||||
|
DEFAULT_BINARY = 'gallery-dl'
|
||||||
|
DEFAULT_START_CMD = None
|
||||||
|
DEFAULT_ARGS = []
|
||||||
|
VERSION_CMD = '{BINARY} --version'
|
||||||
|
|
||||||
|
ENABLED = models.BooleanField(default=True)
|
||||||
|
BINARY = models.CharField(max_length=255, default='gallery-dl')
|
||||||
|
|
||||||
|
WORKERS = models.IntegerField(default='1')
|
||||||
|
|
||||||
|
|
||||||
|
class GalleryDLExtractor(ArchiveBoxDefaultExtractor, SingletonModel):
|
||||||
|
NAME = 'GALLERYDL'
|
||||||
|
LABEL = 'gallery-dl'
|
||||||
|
|
||||||
|
DEPENDENCY = GalleryDLDependency.get_solo()
|
||||||
|
|
||||||
|
# https://github.com/mikf/gallery-dl
|
||||||
|
DEFAULT_CMD = [
|
||||||
|
'{DEPENDENCY.BINARY}',
|
||||||
|
'{ARGS}'
|
||||||
|
'{url}',
|
||||||
|
]
|
||||||
|
DEFAULT_ARGS = [
|
||||||
|
'--timeout', self.TIMEOUT.format(**config),
|
||||||
|
'--cookies', self.COOKIES_TXT.format(**config),
|
||||||
|
'--user-agent', self.COOKIES_TXT.format(**config),
|
||||||
|
'--verify', self.CHECK_SSL_VALIDITY.format(**config),
|
||||||
|
]
|
||||||
|
|
||||||
|
ENABLED = models.BooleanField(default=True)
|
||||||
|
|
||||||
|
CMD = models.CharField(max_length=255, default=DEFAULT_CMD)
|
||||||
|
ARGS = models.CSVField(max_length=255, default=DEFAULT_ARGS)
|
||||||
|
|
||||||
|
TIMEOUT = models.CharField(max_length=255, default='{TIMEOUT}')
|
||||||
|
USER_AGENT = models.CharField(max_length=255, default='{USER_AGENT}')
|
||||||
|
COOKIES_TXT = models.CharField(max_length=255, default='{COOKIES_TXT}')
|
||||||
|
CHECK_SSL_VALIDITY = models.CharField(default='{CHECK_SSL_VALIDITY}')
|
||||||
|
|
||||||
|
# @task
|
||||||
|
# @requires_config('HOSTNAME', 'TIMEOUT', 'USER_AGENT', 'CHECK_SSL_VALIDITY')
|
||||||
|
def extract(self, url: str, out_dir: Path, config: ConfigDict):
|
||||||
|
if not self.ENABLED:
|
||||||
|
return
|
||||||
|
|
||||||
|
extractor_dir = self.create_extractor_directory(out_dir)
|
||||||
|
|
||||||
|
cmd = [
|
||||||
|
self.CMD,
|
||||||
|
url,
|
||||||
|
'--timeout', self.TIMEOUT.format(**config),
|
||||||
|
'--cookies', self.COOKIES_TXT.format(**config),
|
||||||
|
'--user-agent', self.COOKIES_TXT.format(**config),
|
||||||
|
'--verify', self.CHECK_SSL_VALIDITY.format(**config),
|
||||||
|
*split_args(self.ARGS.format(**config)),
|
||||||
|
]
|
||||||
|
|
||||||
|
status, stdout, stderr, output_path = 'failed', '', '', None
|
||||||
|
try:
|
||||||
|
proc, timer, errors = self.DEPENDENCY.run(cmd, cwd=extractor_dir, timeout=self.GALLERYDL_TIMEOUT)
|
||||||
|
stdout, stderr = proc.stdout, proc.stderr
|
||||||
|
|
||||||
|
if 'ERROR: Unsupported URL' in stderr:
|
||||||
|
hints = ('gallery-dl doesnt support this type of url yet',)
|
||||||
|
raise ArchiveError('Failed to save gallerydl', hints)
|
||||||
|
|
||||||
|
if proc.returncode == 0 and 'finished' in stdout:
|
||||||
|
output_path = extractor_dir / 'index.html'
|
||||||
|
status = 'succeeded'
|
||||||
|
except Exception as err:
|
||||||
|
stderr += err
|
||||||
|
|
||||||
|
num_bytes, num_dirs, num_files = get_dir_size(extractor_dir)
|
||||||
|
|
||||||
|
return ArchiveResult(
|
||||||
|
cmd=cmd,
|
||||||
|
pwd=str(out_dir),
|
||||||
|
cmd_version=self.DEPENDENCY.bin_version,
|
||||||
|
cmd_path=self.DEPENDENCY.bin_path,
|
||||||
|
cmd_hostname=config.HOSTNAME,
|
||||||
|
|
||||||
|
output_path=output_path,
|
||||||
|
stdout=stdout,
|
||||||
|
stderr=stderr,
|
||||||
|
status=status,
|
||||||
|
|
||||||
|
num_bytes=num_bytes,
|
||||||
|
num_files=num_files,
|
||||||
|
num_dirs=num_dirs,
|
||||||
|
**timer.stats,
|
||||||
|
)
|
59
archivebox/plugins/gallerydl/plugin.yaml
Normal file
59
archivebox/plugins/gallerydl/plugin.yaml
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
dependencies:
|
||||||
|
GalleryDLDependency:
|
||||||
|
ID: gallerydl
|
||||||
|
LABEL: GalleryDL
|
||||||
|
REQUIRED: false
|
||||||
|
|
||||||
|
PARENT_DEPENDENCIES:
|
||||||
|
- BashEnvironmentDependency
|
||||||
|
- PipEnvironmentDependency
|
||||||
|
|
||||||
|
PIP_DEPENDENCIES:
|
||||||
|
- gallery-dl
|
||||||
|
|
||||||
|
USER_CONFIG:
|
||||||
|
ENABLED: models.BooleanField(max_length=255, default={DEFAULT_CONFIG.ENABLED})
|
||||||
|
BINARY: models.CharField(max_length=255, default={DEFAULT_CONFIG.BINARY})
|
||||||
|
|
||||||
|
DEFAULT_CONFIG:
|
||||||
|
ENABLED: true
|
||||||
|
BINARY: 'gallery-dl'
|
||||||
|
|
||||||
|
CONFIG_ALIASES:
|
||||||
|
- SAVE_GALLERYDL: ENABLED
|
||||||
|
- USE_GALLERYDL: ENABLED
|
||||||
|
- GALLERYDL_ENABLED: ENABLED
|
||||||
|
- GALLERYDL_BINARY: BINARY
|
||||||
|
|
||||||
|
TASKS:
|
||||||
|
# plugins.GalleryDLDependency
|
||||||
|
run_dependency: plugins.gallerydl.models.GalleryDLDependency.run_dependency
|
||||||
|
|
||||||
|
|
||||||
|
extractors:
|
||||||
|
GalleryDLExtractor:
|
||||||
|
ID: GALLERYDL
|
||||||
|
LABEL: GalleryDL
|
||||||
|
ENABLED: true
|
||||||
|
|
||||||
|
DEPENDENCY: GalleryDLDependency
|
||||||
|
|
||||||
|
CONFIG:
|
||||||
|
ENABLED: models.BooleanField(default={DEFAULT_CONFIG.ENABLED})
|
||||||
|
CMD: models.CharField(max_length=255, default={DEFAULT_CONFIG.CMD})
|
||||||
|
ARGS: models.CharField(max_length=255, default={DEFAULT_CONFIG.ARGS})
|
||||||
|
USER_AGENT: models.CharField(max_length=255, default={DEFAULT_CONFIG.USER_AGENT})
|
||||||
|
CHECK_SSL_VALIDITY: models.CharField(max_length=255, default={DEFAULT_CONFIG.CHECK_SSL_VALIDITY})
|
||||||
|
|
||||||
|
DEFAULT_CONFIG:
|
||||||
|
ENABLED: true
|
||||||
|
CMD: gallery-dl {args} {url}
|
||||||
|
ARGS: --user-agent={USER_AGENT} --check-ssl={CHECK_SSL_VALIDITY}
|
||||||
|
CHECK_SSL_VALIDITY: {CHECK_SSL_VALIDITY}
|
||||||
|
USER_AGENT: {USER_AGENT}
|
||||||
|
|
||||||
|
|
||||||
|
TASKS:
|
||||||
|
CREATE_OUT_DIR: plugins.gallerydl.tasks.create_out_dir
|
||||||
|
SHOULD_EXTRACT: plugins.gallerydl.tasks.should_extract
|
||||||
|
EXTRACT: plugins.gallerydl.tasks.extract
|
124
archivebox/plugins/gallerydl/static/sw.js
Normal file
124
archivebox/plugins/gallerydl/static/sw.js
Normal file
File diff suppressed because one or more lines are too long
1
archivebox/plugins/gallerydl/static/test.txt
Normal file
1
archivebox/plugins/gallerydl/static/test.txt
Normal file
|
@ -0,0 +1 @@
|
||||||
|
test content this should be visible
|
BIN
archivebox/plugins/gallerydl/static/test.wacz
Normal file
BIN
archivebox/plugins/gallerydl/static/test.wacz
Normal file
Binary file not shown.
3392
archivebox/plugins/gallerydl/static/ui.js
Normal file
3392
archivebox/plugins/gallerydl/static/ui.js
Normal file
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,40 @@
|
||||||
|
{% load tz core_tags static %}
|
||||||
|
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<title>{{title}}</title>
|
||||||
|
<meta charset="utf-8" name="viewport" content="width=device-width, initial-scale=1">
|
||||||
|
|
||||||
|
</style>
|
||||||
|
<style>
|
||||||
|
html, body {
|
||||||
|
width: 100%;
|
||||||
|
height: 100%;
|
||||||
|
background-color: #ddd;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
ReplayWeb.page for: {{snapshot.url}} ({{timestamp}}) /{{warc_filename}}
|
||||||
|
|
||||||
|
{{snapshot}}
|
||||||
|
|
||||||
|
<script>
|
||||||
|
// https://cdn.jsdelivr.net/npm/replaywebpage@1.8.14/sw.min.js
|
||||||
|
// https://cdn.jsdelivr.net/npm/replaywebpage@1.8.14/ui.min.js
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
</style>
|
||||||
|
<script src="/static/ui.js"></script>
|
||||||
|
|
||||||
|
<replay-web-page
|
||||||
|
style="height: 600px"
|
||||||
|
embed="replay"
|
||||||
|
replayBase="/static/"
|
||||||
|
source="/static/test.wacz"
|
||||||
|
url="https://example.com/">
|
||||||
|
</replay-web-page>
|
||||||
|
</body>
|
||||||
|
</html>
|
12
archivebox/plugins/gallerydl/urls.py
Normal file
12
archivebox/plugins/gallerydl/urls.py
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
from django.urls import path
|
||||||
|
|
||||||
|
from .views import GalleryDLIconView, GalleryDLEmbedView, GalleryDLOutputView, GalleryDLDependencyView, GalleryDLExtractorView
|
||||||
|
|
||||||
|
urlpatterns = [
|
||||||
|
path('/plugins/gallerydl/icon/<path:path>', GalleryDLIconView(.as_view), name='gallerydl_icon'),
|
||||||
|
path('/plugins/gallerydl/embed/<path:path>', GalleryDLEmbedView.as_view(), name='gallerydl_embed'),
|
||||||
|
path('/plugins/gallerydl/output/<path:path>', GalleryDLOutputView.as_view(), name='gallerydl_output'),
|
||||||
|
|
||||||
|
path('/plugins/gallerydl/dependency/', GalleryDLDependencyView.as_view(), name='gallerydl_dependency'),
|
||||||
|
path('/plugins/gallerydl/extractor/', GalleryDLExtractorView.as_view(), name='gallerydl_extractor'),
|
||||||
|
]
|
78
archivebox/plugins/gallerydl/views.py
Normal file
78
archivebox/plugins/gallerydl/views.py
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.views import View
|
||||||
|
from django.shortcuts import render
|
||||||
|
from django.db.models import Q
|
||||||
|
|
||||||
|
from core.models import Snapshot
|
||||||
|
|
||||||
|
# from archivebox.config import PUBLIC_SNAPSHOTS
|
||||||
|
PUBLIC_SNAPSHOTS = True
|
||||||
|
|
||||||
|
|
||||||
|
class GalleryDLIconView(View):
|
||||||
|
template_name = 'plugin_gallerydl__icon.html'
|
||||||
|
|
||||||
|
# render static html index from filesystem archive/<timestamp>/index.html
|
||||||
|
|
||||||
|
def get_context_data(self, **kwargs):
|
||||||
|
return {
|
||||||
|
# **super().get_context_data(**kwargs),
|
||||||
|
# 'VERSION': VERSION,
|
||||||
|
# 'COMMIT_HASH': COMMIT_HASH,
|
||||||
|
# 'FOOTER_INFO': FOOTER_INFO,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get(self, request, path):
|
||||||
|
if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:
|
||||||
|
return redirect(f'/admin/login/?next={request.path}')
|
||||||
|
|
||||||
|
# ...
|
||||||
|
return render(template_name=self.template_name, request=self.request, context=context)
|
||||||
|
|
||||||
|
|
||||||
|
class GalleryDLEmbedView(View):
|
||||||
|
template_name = 'plugin_gallerydl__embed.html'
|
||||||
|
|
||||||
|
# render static html index from filesystem archive/<timestamp>/index.html
|
||||||
|
|
||||||
|
def get_context_data(self, **kwargs):
|
||||||
|
return {
|
||||||
|
# **super().get_context_data(**kwargs),
|
||||||
|
# 'VERSION': VERSION,
|
||||||
|
# 'COMMIT_HASH': COMMIT_HASH,
|
||||||
|
# 'FOOTER_INFO': FOOTER_INFO,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get(self, request, path):
|
||||||
|
if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:
|
||||||
|
return redirect(f'/admin/login/?next={request.path}')
|
||||||
|
|
||||||
|
# ...
|
||||||
|
return render(template_name=self.template_name, request=self.request, context=context)
|
||||||
|
|
||||||
|
|
||||||
|
class GalleryDLOutputView(View):
|
||||||
|
template_name = 'plugin_gallerydl__output.html'
|
||||||
|
|
||||||
|
# render static html index from filesystem archive/<timestamp>/index.html
|
||||||
|
|
||||||
|
def get_context_data(self, **kwargs):
|
||||||
|
return {
|
||||||
|
# **super().get_context_data(**kwargs),
|
||||||
|
# 'VERSION': VERSION,
|
||||||
|
# 'COMMIT_HASH': COMMIT_HASH,
|
||||||
|
# 'FOOTER_INFO': FOOTER_INFO,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get(self, request, path):
|
||||||
|
if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:
|
||||||
|
return redirect(f'/admin/login/?next={request.path}')
|
||||||
|
|
||||||
|
# ...
|
||||||
|
return render(template_name=self.template_name, request=self.request, context=context)
|
1
archivebox/plugins/replaywebpage/__init__.py
Normal file
1
archivebox/plugins/replaywebpage/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
__package__ = 'archivebox.plugins.replaywebpage'
|
8
archivebox/plugins/replaywebpage/apps.py
Normal file
8
archivebox/plugins/replaywebpage/apps.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
|
||||||
|
class ReplayWebPageConfig(AppConfig):
|
||||||
|
label = "ReplayWeb.Page"
|
||||||
|
name = "plugin_replaywebpage"
|
||||||
|
|
||||||
|
default_auto_field = "django.db.models.BigAutoField"
|
50
archivebox/plugins/replaywebpage/extractors.py
Normal file
50
archivebox/plugins/replaywebpage/extractors.py
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
# browsertrix extractor
|
||||||
|
|
||||||
|
def save_browsertrix(link, out_dir, timeout, config):
|
||||||
|
|
||||||
|
|
||||||
|
browsertrix_dir = out_dir / 'browsertrix'
|
||||||
|
browsertrix_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
crawl_id = link.timestamp
|
||||||
|
|
||||||
|
browsertrix_crawler_cmd = [
|
||||||
|
'crawl',
|
||||||
|
f'--url', link.url,
|
||||||
|
f'--collection={crawl_id}',
|
||||||
|
'--scopeType=page',
|
||||||
|
'--generateWACZ',
|
||||||
|
'--text=final-to-warc',
|
||||||
|
'--timeLimit=60',
|
||||||
|
]
|
||||||
|
|
||||||
|
remote_cmd = """
|
||||||
|
rm /tmp/dump.rdb;
|
||||||
|
rm -rf /crawls/collections;
|
||||||
|
mkdir /crawls/collections;
|
||||||
|
env CRAWL_ID={crawl_id}
|
||||||
|
"""
|
||||||
|
|
||||||
|
local_cmd = ['nc', 'browsertrix', '2222']
|
||||||
|
|
||||||
|
status = 'succeeded'
|
||||||
|
timer = TimedProgress(timeout, prefix=' ')
|
||||||
|
try:
|
||||||
|
result = run(local_cmd, cwd=str(out_dir), input=remote_cmd, timeout=timeout)
|
||||||
|
|
||||||
|
cmd_output = result.stdout.decode()
|
||||||
|
|
||||||
|
wacz_output_file = Path('/browsertrix/crawls') / crawl_id / f'{crawl_id}'.wacz
|
||||||
|
|
||||||
|
copy_and_overwrite(wacz_output_file, browsertrix_dir / wacz_output_file.name)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
TEMPLATE = """
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
# rm /tmp/dump.rdb;
|
||||||
|
# rm -rf /crawls/collections;
|
||||||
|
# mkdir /crawls/collections;
|
||||||
|
# env CRAWL_ID=tec2342 crawl --url 'https://example.com' --scopeType page --generateWACZ --collection tec2342 --text final-to-warc --timeLimit 60
|
12
archivebox/plugins/replaywebpage/models.py
Normal file
12
archivebox/plugins/replaywebpage/models.py
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
# from solo.models import SingletonModel
|
||||||
|
|
||||||
|
|
||||||
|
# class ReplayWebPageConfiguration(SingletonModel):
|
||||||
|
# site_name = models.CharField(max_length=255, default='Site Name')
|
||||||
|
# maintenance_mode = models.BooleanField(default=False)
|
||||||
|
|
||||||
|
# def __str__(self):
|
||||||
|
# return "Site Configuration"
|
||||||
|
|
||||||
|
# class Meta:
|
||||||
|
# verbose_name = "Site Configuration"
|
124
archivebox/plugins/replaywebpage/static/sw.js
Normal file
124
archivebox/plugins/replaywebpage/static/sw.js
Normal file
File diff suppressed because one or more lines are too long
1
archivebox/plugins/replaywebpage/static/test.txt
Normal file
1
archivebox/plugins/replaywebpage/static/test.txt
Normal file
|
@ -0,0 +1 @@
|
||||||
|
test content this should be visible
|
BIN
archivebox/plugins/replaywebpage/static/test.wacz
Normal file
BIN
archivebox/plugins/replaywebpage/static/test.wacz
Normal file
Binary file not shown.
3392
archivebox/plugins/replaywebpage/static/ui.js
Normal file
3392
archivebox/plugins/replaywebpage/static/ui.js
Normal file
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,40 @@
|
||||||
|
{% load tz core_tags static %}
|
||||||
|
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<title>{{title}}</title>
|
||||||
|
<meta charset="utf-8" name="viewport" content="width=device-width, initial-scale=1">
|
||||||
|
|
||||||
|
</style>
|
||||||
|
<style>
|
||||||
|
html, body {
|
||||||
|
width: 100%;
|
||||||
|
height: 100%;
|
||||||
|
background-color: #ddd;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
ReplayWeb.page for: {{snapshot.url}} ({{timestamp}}) /{{warc_filename}}
|
||||||
|
|
||||||
|
{{snapshot}}
|
||||||
|
|
||||||
|
<script>
|
||||||
|
// https://cdn.jsdelivr.net/npm/replaywebpage@1.8.14/sw.min.js
|
||||||
|
// https://cdn.jsdelivr.net/npm/replaywebpage@1.8.14/ui.min.js
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
</style>
|
||||||
|
<script src="/static/ui.js"></script>
|
||||||
|
|
||||||
|
<replay-web-page
|
||||||
|
style="height: 600px"
|
||||||
|
embed="replay"
|
||||||
|
replayBase="/static/"
|
||||||
|
source="/static/test.wacz"
|
||||||
|
url="https://example.com/">
|
||||||
|
</replay-web-page>
|
||||||
|
</body>
|
||||||
|
</html>
|
7
archivebox/plugins/replaywebpage/urls.py
Normal file
7
archivebox/plugins/replaywebpage/urls.py
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
from django.urls import path
|
||||||
|
|
||||||
|
from .views import ReplayWebPageViewer
|
||||||
|
|
||||||
|
urlpatterns = [
|
||||||
|
path('<path:path>', ReplayWebPageViewer.as_view(), name='plugin_replaywebpage__viewer'),
|
||||||
|
]
|
47
archivebox/plugins/replaywebpage/views.py
Normal file
47
archivebox/plugins/replaywebpage/views.py
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.views import View
|
||||||
|
from django.shortcuts import render
|
||||||
|
from django.db.models import Q
|
||||||
|
|
||||||
|
from core.models import Snapshot
|
||||||
|
|
||||||
|
# from archivebox.config import PUBLIC_SNAPSHOTS
|
||||||
|
PUBLIC_SNAPSHOTS = True
|
||||||
|
|
||||||
|
|
||||||
|
class ReplayWebPageViewer(View):
|
||||||
|
template_name = 'plugin_replaywebpage__viewer.html'
|
||||||
|
|
||||||
|
# render static html index from filesystem archive/<timestamp>/index.html
|
||||||
|
|
||||||
|
def get_context_data(self, **kwargs):
|
||||||
|
return {
|
||||||
|
# **super().get_context_data(**kwargs),
|
||||||
|
# 'VERSION': VERSION,
|
||||||
|
# 'COMMIT_HASH': COMMIT_HASH,
|
||||||
|
# 'FOOTER_INFO': FOOTER_INFO,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get(self, request, path):
|
||||||
|
if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:
|
||||||
|
return redirect(f'/admin/login/?next={request.path}')
|
||||||
|
|
||||||
|
try:
|
||||||
|
timestamp, warc_filename = path.split('/', 1)
|
||||||
|
except (IndexError, ValueError):
|
||||||
|
timestamp, warc_filename = path.split('/', 1)[0], ''
|
||||||
|
|
||||||
|
snapshot = Snapshot.objects.get(Q(timestamp=timestamp) | Q(id__startswith=timestamp))
|
||||||
|
|
||||||
|
context = self.get_context_data()
|
||||||
|
context.update({
|
||||||
|
"snapshot": snapshot,
|
||||||
|
"timestamp": timestamp,
|
||||||
|
"warc_filename": warc_filename,
|
||||||
|
})
|
||||||
|
return render(template_name=self.template_name, request=self.request, context=context)
|
||||||
|
|
3
archivebox/plugins/system/__init__.py
Normal file
3
archivebox/plugins/system/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
__package__ = 'archivebox.plugins.system'
|
||||||
|
|
||||||
|
default_app_config = 'plugins.system.apps.SystemPluginAppConfig'
|
49
archivebox/plugins/system/admin.py
Normal file
49
archivebox/plugins/system/admin.py
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
from django.contrib import admin
|
||||||
|
from solo.admin import SingletonModelAdmin
|
||||||
|
|
||||||
|
from plugins.defaults.admin import DependencyAdmin, ExtractorAdmin
|
||||||
|
|
||||||
|
from .models import (
|
||||||
|
BashEnvironmentDependency,
|
||||||
|
PythonEnvironmentDependency,
|
||||||
|
NodeJSEnvironmentDependency,
|
||||||
|
|
||||||
|
AptEnvironmentDependency,
|
||||||
|
BrewEnvironmentDependency,
|
||||||
|
PipEnvironmentDependency,
|
||||||
|
NPMEnvironmentDependency,
|
||||||
|
|
||||||
|
SQLiteDependency,
|
||||||
|
DjangoDependency,
|
||||||
|
ArchiveBoxDependency,
|
||||||
|
|
||||||
|
# ArchiveBoxDefaultExtractor,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
print('DefaultsPluginConfig.admin')
|
||||||
|
|
||||||
|
class MultiDependencyAdmin(admin.ModelAdmin):
|
||||||
|
readonly_fields = DependencyAdmin.readonly_fields
|
||||||
|
list_display = ('id', 'NAME', 'ENABLED', 'BINARY', 'ARGS', 'bin_path', 'bin_version', 'is_valid', 'is_enabled')
|
||||||
|
|
||||||
|
class MultiExtractorAdmin(admin.ModelAdmin):
|
||||||
|
readonly_fields = DependencyAdmin.readonly_fields
|
||||||
|
list_display = ('id', 'NAME', 'CMD', 'ARGS', 'is_valid', 'is_enabled')
|
||||||
|
|
||||||
|
|
||||||
|
# admin.site.register(BashEnvironmentDependency, DependencyAdmin)
|
||||||
|
admin.site.register(BashEnvironmentDependency, MultiDependencyAdmin)
|
||||||
|
admin.site.register(PythonEnvironmentDependency, DependencyAdmin)
|
||||||
|
admin.site.register(NodeJSEnvironmentDependency, DependencyAdmin)
|
||||||
|
|
||||||
|
admin.site.register(AptEnvironmentDependency, DependencyAdmin)
|
||||||
|
admin.site.register(BrewEnvironmentDependency, DependencyAdmin)
|
||||||
|
admin.site.register(PipEnvironmentDependency, DependencyAdmin)
|
||||||
|
admin.site.register(NPMEnvironmentDependency, DependencyAdmin)
|
||||||
|
|
||||||
|
admin.site.register(SQLiteDependency, DependencyAdmin)
|
||||||
|
admin.site.register(DjangoDependency, DependencyAdmin)
|
||||||
|
admin.site.register(ArchiveBoxDependency, DependencyAdmin)
|
||||||
|
|
||||||
|
# admin.site.register(ArchiveBoxDefaultExtractor, ExtractorAdmin)
|
21
archivebox/plugins/system/apps.py
Normal file
21
archivebox/plugins/system/apps.py
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
__package__ = 'archivebox.plugins.system'
|
||||||
|
|
||||||
|
|
||||||
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
|
||||||
|
class SystemPluginAppConfig(AppConfig):
|
||||||
|
name = "plugins.system"
|
||||||
|
verbose_name = "Host System Configuration"
|
||||||
|
|
||||||
|
default_auto_field = "django.db.models.AutoField"
|
||||||
|
|
||||||
|
def ready(self):
|
||||||
|
print('plugins.system.apps.SystemPluginConfig.ready')
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from plugins.defaults.settings import register_plugin_settings
|
||||||
|
|
||||||
|
register_plugin_settings(settings, name=self.name)
|
||||||
|
|
144
archivebox/plugins/system/migrations/0001_initial.py
Normal file
144
archivebox/plugins/system/migrations/0001_initial.py
Normal file
|
@ -0,0 +1,144 @@
|
||||||
|
# Generated by Django 3.1.14 on 2024-01-24 08:56
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
initial = True
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='AptEnvironmentDependency',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||||
|
('ENABLED', models.BooleanField(default=True)),
|
||||||
|
('BINARY', models.CharField(default='apt-get', max_length=255)),
|
||||||
|
('ARGS', models.CharField(default='-qq', max_length=255)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'Package Manager: apt',
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='ArchiveBoxDependency',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||||
|
('ENABLED', models.BooleanField(default=True, editable=False)),
|
||||||
|
('BINARY', models.CharField(default='archivebox', editable=False, max_length=255)),
|
||||||
|
('ARGS', models.CharField(default=[], editable=False, max_length=255)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'Internal Dependency: ArchiveBox Package',
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='BashEnvironmentDependency',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||||
|
('ENABLED', models.BooleanField(default=True, editable=False)),
|
||||||
|
('BINARY', models.CharField(default='bash', max_length=255)),
|
||||||
|
('ARGS', models.CharField(default='-c', max_length=255)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'Shell Environment: bash',
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='BrewEnvironmentDependency',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||||
|
('ENABLED', models.BooleanField(default=True)),
|
||||||
|
('BINARY', models.CharField(default='brew', max_length=255)),
|
||||||
|
('ARGS', models.CharField(default='', max_length=255)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'Package Manager: brew',
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='DjangoDependency',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||||
|
('ENABLED', models.BooleanField(default=True, editable=False)),
|
||||||
|
('BINARY', models.CharField(default='django-admin.py', editable=False, max_length=255)),
|
||||||
|
('ARGS', models.CharField(default=[], editable=False, max_length=255)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'Internal Dependency: Django Package',
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='NodeJSEnvironmentDependency',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||||
|
('ENABLED', models.BooleanField(default=True)),
|
||||||
|
('BINARY', models.CharField(default='node', max_length=255)),
|
||||||
|
('ARGS', models.CharField(default='-c', max_length=255)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'Shell Environment: NodeJS',
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='NPMEnvironmentDependency',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||||
|
('ENABLED', models.BooleanField(default=True)),
|
||||||
|
('BINARY', models.CharField(default='node', max_length=255)),
|
||||||
|
('ARGS', models.CharField(default='', max_length=255)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'Package Manager: npm',
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='PipEnvironmentDependency',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||||
|
('ENABLED', models.BooleanField(default=True)),
|
||||||
|
('BINARY', models.CharField(default='pip3', max_length=255)),
|
||||||
|
('ARGS', models.CharField(default='', max_length=255)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'Package Manager: pip',
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='PythonEnvironmentDependency',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||||
|
('ENABLED', models.BooleanField(default=True, editable=False)),
|
||||||
|
('BINARY', models.CharField(default='python3', max_length=255)),
|
||||||
|
('ARGS', models.CharField(default='-c', max_length=255)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'Shell Environment: Python3',
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='SQLiteDependency',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||||
|
('ENABLED', models.BooleanField(default=True, editable=False)),
|
||||||
|
('BINARY', models.CharField(default='sqlite3', editable=False, max_length=255)),
|
||||||
|
('ARGS', models.CharField(default=[], editable=False, max_length=255)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'Internal Dependency: SQLite3 Package',
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
|
@ -0,0 +1,33 @@
|
||||||
|
# Generated by Django 3.1.14 on 2024-01-24 09:43
|
||||||
|
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('system', '0001_initial'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterModelOptions(
|
||||||
|
name='archiveboxdependency',
|
||||||
|
options={'verbose_name': 'Internal Dependency: archivebox'},
|
||||||
|
),
|
||||||
|
migrations.AlterModelOptions(
|
||||||
|
name='djangodependency',
|
||||||
|
options={'verbose_name': 'Internal Dependency: django'},
|
||||||
|
),
|
||||||
|
migrations.AlterModelOptions(
|
||||||
|
name='nodejsenvironmentdependency',
|
||||||
|
options={'verbose_name': 'Shell Environment: node'},
|
||||||
|
),
|
||||||
|
migrations.AlterModelOptions(
|
||||||
|
name='pythonenvironmentdependency',
|
||||||
|
options={'verbose_name': 'Shell Environment: python3'},
|
||||||
|
),
|
||||||
|
migrations.AlterModelOptions(
|
||||||
|
name='sqlitedependency',
|
||||||
|
options={'verbose_name': 'Internal Dependency: sqlite3'},
|
||||||
|
),
|
||||||
|
]
|
|
@ -0,0 +1,22 @@
|
||||||
|
# Generated by Django 3.1.14 on 2024-01-24 09:56
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('system', '0002_auto_20240124_0943'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterModelOptions(
|
||||||
|
name='bashenvironmentdependency',
|
||||||
|
options={'verbose_name': 'Shell Environment: bash', 'verbose_name_plural': 'Shell Environments: bash'},
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='bashenvironmentdependency',
|
||||||
|
name='VERSION_CMD',
|
||||||
|
field=models.CharField(default='{BINARY} --version', max_length=255),
|
||||||
|
),
|
||||||
|
]
|
0
archivebox/plugins/system/migrations/__init__.py
Normal file
0
archivebox/plugins/system/migrations/__init__.py
Normal file
448
archivebox/plugins/system/models.py
Normal file
448
archivebox/plugins/system/models.py
Normal file
|
@ -0,0 +1,448 @@
|
||||||
|
# __package__ = 'archivebox.plugins.system'
|
||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import sys
|
||||||
|
import inspect
|
||||||
|
import django
|
||||||
|
from sqlite3 import dbapi2 as sqlite3
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
|
||||||
|
from django.db import models
|
||||||
|
from django.utils.functional import cached_property
|
||||||
|
|
||||||
|
from solo.models import SingletonModel
|
||||||
|
|
||||||
|
from config import bin_path, bin_version, VERSION
|
||||||
|
|
||||||
|
from plugins.defaults.models import ArchiveBoxBaseDependency
|
||||||
|
|
||||||
|
ConfigDict = Dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
|
class BashEnvironmentDependency(ArchiveBoxBaseDependency):
|
||||||
|
singleton_instance_id = 1
|
||||||
|
|
||||||
|
id = models.AutoField(primary_key=True)
|
||||||
|
|
||||||
|
NAME = 'BASH'
|
||||||
|
LABEL = "Bash"
|
||||||
|
REQUIRED = True
|
||||||
|
|
||||||
|
PARENT_DEPENDENCIES = []
|
||||||
|
|
||||||
|
BIN_DEPENDENCIES: List[str] = ['bash']
|
||||||
|
APT_DEPENDENCIES: List[str] = []
|
||||||
|
BREW_DEPENDENCIES: List[str] = []
|
||||||
|
PIP_DEPENDENCIES: List[str] = []
|
||||||
|
NPM_DEPENDENCIES: List[str] = []
|
||||||
|
|
||||||
|
DEFAULT_BINARY = 'bash'
|
||||||
|
DEFAULT_START_CMD = None
|
||||||
|
DEFAULT_STOP_CMD = None
|
||||||
|
DEFAULT_PID_FILE = None
|
||||||
|
DEFAULT_ARGS = '-c'
|
||||||
|
|
||||||
|
ENABLED = models.BooleanField(default=True, editable=not REQUIRED)
|
||||||
|
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY)
|
||||||
|
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS)
|
||||||
|
|
||||||
|
VERSION_CMD = models.CharField(max_length=255, default='{BINARY} --version')
|
||||||
|
|
||||||
|
# START_CMD = models.CharField(max_length=255, default=DEFAULT_START_CMD)
|
||||||
|
# WORKERS = models.IntegerField(default=1)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = False
|
||||||
|
app_label = 'system'
|
||||||
|
verbose_name = "Shell Environment: bash"
|
||||||
|
verbose_name_plural = "Shell Environments: bash"
|
||||||
|
|
||||||
|
# @task
|
||||||
|
def install_pkgs(self, os_pkgs=()):
|
||||||
|
assert self.is_valid, 'Bash environment is not available on this host'
|
||||||
|
|
||||||
|
for os_dependency in os_pkgs:
|
||||||
|
assert bin_path(os_dependency)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
class PythonEnvironmentDependency(ArchiveBoxBaseDependency):
|
||||||
|
singleton_instance_id = 1
|
||||||
|
|
||||||
|
id = models.AutoField(primary_key=True)
|
||||||
|
|
||||||
|
NAME = 'PYTHON'
|
||||||
|
LABEL = "Python"
|
||||||
|
REQUIRED = True
|
||||||
|
|
||||||
|
PARENT_DEPENDENCIES = []
|
||||||
|
|
||||||
|
BIN_DEPENDENCIES = ['python3']
|
||||||
|
APT_DEPENDENCIES = []
|
||||||
|
BREW_DEPENDENCIES = []
|
||||||
|
PIP_DEPENDENCIES = []
|
||||||
|
NPM_DEPENDENCIES = []
|
||||||
|
|
||||||
|
DEFAULT_BINARY = 'python3'
|
||||||
|
DEFAULT_START_CMD = None
|
||||||
|
DEFAULT_STOP_CMD = None
|
||||||
|
DEFAULT_PID_FILE = None
|
||||||
|
DEFAULT_ARGS = '-c'
|
||||||
|
VERSION_CMD = '{BINARY} --version'
|
||||||
|
|
||||||
|
ENABLED = models.BooleanField(default=True, editable=not REQUIRED)
|
||||||
|
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY)
|
||||||
|
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS)
|
||||||
|
|
||||||
|
# START_CMD = models.CharField(max_length=255, default=DEFAULT_START_CMD)
|
||||||
|
# WORKERS = models.IntegerField(default=1)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = False
|
||||||
|
app_label = 'system'
|
||||||
|
verbose_name = "Shell Environment: python3"
|
||||||
|
|
||||||
|
class NodeJSEnvironmentDependency(ArchiveBoxBaseDependency, SingletonModel):
|
||||||
|
singleton_instance_id = 1
|
||||||
|
|
||||||
|
id = models.AutoField(primary_key=True)
|
||||||
|
|
||||||
|
NAME = 'NODEJS'
|
||||||
|
LABEL = "NodeJS"
|
||||||
|
REQUIRED = True
|
||||||
|
|
||||||
|
PARENT_DEPENDENCIES = []
|
||||||
|
|
||||||
|
BIN_DEPENDENCIES = ['node']
|
||||||
|
APT_DEPENDENCIES = []
|
||||||
|
BREW_DEPENDENCIES = []
|
||||||
|
PIP_DEPENDENCIES = []
|
||||||
|
NPM_DEPENDENCIES = []
|
||||||
|
|
||||||
|
DEFAULT_BINARY = 'node'
|
||||||
|
DEFAULT_START_CMD = None
|
||||||
|
DEFAULT_STOP_CMD = None
|
||||||
|
DEFAULT_PID_FILE = None
|
||||||
|
DEFAULT_ARGS = '-c'
|
||||||
|
VERSION_CMD = '{BINARY} --version'
|
||||||
|
|
||||||
|
ENABLED = models.BooleanField(default=True, editable=True)
|
||||||
|
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY)
|
||||||
|
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS)
|
||||||
|
|
||||||
|
# START_CMD = models.CharField(max_length=255, default=DEFAULT_START_CMD)
|
||||||
|
# WORKERS = models.IntegerField(default=1)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = False
|
||||||
|
app_label = 'system'
|
||||||
|
verbose_name = "Shell Environment: node"
|
||||||
|
|
||||||
|
|
||||||
|
class AptEnvironmentDependency(ArchiveBoxBaseDependency, SingletonModel):
|
||||||
|
singleton_instance_id = 1
|
||||||
|
|
||||||
|
id = models.AutoField(primary_key=True)
|
||||||
|
|
||||||
|
NAME = 'APT'
|
||||||
|
LABEL = "apt"
|
||||||
|
REQUIRED = False
|
||||||
|
|
||||||
|
PARENT_DEPENDENCIES = ['BashEnvironmentDependency']
|
||||||
|
|
||||||
|
BIN_DEPENDENCIES = ['apt-get']
|
||||||
|
APT_DEPENDENCIES = []
|
||||||
|
BREW_DEPENDENCIES = []
|
||||||
|
PIP_PACKAGES = []
|
||||||
|
NPM_PACKAGES = []
|
||||||
|
|
||||||
|
DEFAULT_BINARY = 'apt-get'
|
||||||
|
DEFAULT_START_CMD = None
|
||||||
|
DEFAULT_STOP_CMD = None
|
||||||
|
DEFAULT_PID_FILE = None
|
||||||
|
DEFAULT_ARGS = '-qq'
|
||||||
|
|
||||||
|
ENABLED = models.BooleanField(default=True, editable=not REQUIRED)
|
||||||
|
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY)
|
||||||
|
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = False
|
||||||
|
app_label = 'system'
|
||||||
|
verbose_name = "Package Manager: apt"
|
||||||
|
|
||||||
|
# @task
|
||||||
|
def install_pkgs(self, apt_pkgs=()):
|
||||||
|
assert self.is_valid, 'Apt environment is not available on this host'
|
||||||
|
|
||||||
|
# with huey.lock_task('apt-install'):
|
||||||
|
|
||||||
|
run(cmd=[self.DEFAULT_BINARY, '-qq', 'update'])
|
||||||
|
for apt_package in apt_pkgs:
|
||||||
|
run(cmd=[self.DEFAULT_BINARY, 'install', '-y', apt_package])
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
class BrewEnvironmentDependency(ArchiveBoxBaseDependency, SingletonModel):
|
||||||
|
singleton_instance_id = 1
|
||||||
|
|
||||||
|
id = models.AutoField(primary_key=True)
|
||||||
|
|
||||||
|
NAME = 'BREW'
|
||||||
|
LABEL = "homebrew"
|
||||||
|
REQUIRED = False
|
||||||
|
|
||||||
|
PARENT_DEPENDENCIES = ['BashEnvironmentDependency']
|
||||||
|
|
||||||
|
BIN_DEPENDENCIES = ['brew']
|
||||||
|
APT_DEPENDENCIES = []
|
||||||
|
BREW_DEPENDENCIES = []
|
||||||
|
PIP_PACKAGES = []
|
||||||
|
NPM_PACKAGES = []
|
||||||
|
|
||||||
|
DEFAULT_BINARY = 'brew'
|
||||||
|
DEFAULT_START_CMD = None
|
||||||
|
DEFAULT_STOP_CMD = None
|
||||||
|
DEFAULT_PID_FILE = None
|
||||||
|
DEFAULT_ARGS = ''
|
||||||
|
|
||||||
|
ENABLED = models.BooleanField(default=True, editable=True)
|
||||||
|
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY)
|
||||||
|
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = False
|
||||||
|
app_label = 'system'
|
||||||
|
verbose_name = "Package Manager: brew"
|
||||||
|
|
||||||
|
# @task
|
||||||
|
def install_pkgs(self, brew_pkgs=()):
|
||||||
|
assert self.is_valid, 'Brw environment is not available on this host'
|
||||||
|
|
||||||
|
run(cmd=[self.DEFAULT_BINARY, 'update'])
|
||||||
|
|
||||||
|
for brew_pkg in brew_pkgs:
|
||||||
|
run(cmd=[self.DEFAULT_BINARY, 'install', brew_pkg])
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PipEnvironmentDependency(ArchiveBoxBaseDependency, SingletonModel):
|
||||||
|
singleton_instance_id = 1
|
||||||
|
|
||||||
|
id = models.AutoField(primary_key=True)
|
||||||
|
|
||||||
|
NAME = 'PIP'
|
||||||
|
LABEL = "pip"
|
||||||
|
REQUIRED = False
|
||||||
|
|
||||||
|
PARENT_DEPENDENCIES = ['BashEnvironmentDependency']
|
||||||
|
|
||||||
|
BIN_DEPENDENCIES = ['python3', 'pip3']
|
||||||
|
APT_DEPENDENCIES = ['python3.11', 'pip3', 'pipx']
|
||||||
|
BREW_DEPENDENCIES = ['python@3.11', 'pipx']
|
||||||
|
PIP_PACKAGES = ['setuptools', 'pipx']
|
||||||
|
NPM_PACKAGES = []
|
||||||
|
|
||||||
|
DEFAULT_BINARY = 'pip3'
|
||||||
|
DEFAULT_START_CMD = None
|
||||||
|
DEFAULT_STOP_CMD = None
|
||||||
|
DEFAULT_PID_FILE = None
|
||||||
|
DEFAULT_ARGS = ''
|
||||||
|
VERSION_CMD = '{BINARY} --version'
|
||||||
|
|
||||||
|
ENABLED = models.BooleanField(default=True, editable=True)
|
||||||
|
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY)
|
||||||
|
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = False
|
||||||
|
app_label = 'system'
|
||||||
|
verbose_name = "Package Manager: pip"
|
||||||
|
|
||||||
|
# @task
|
||||||
|
def install_pkgs(self, pip_pkgs=()):
|
||||||
|
assert self.is_valid, 'Pip environment is not available on this host'
|
||||||
|
|
||||||
|
for pip_pkg in pip_pkgs:
|
||||||
|
run(cmd=[self.DEFAULT_BINARY, 'install', '--update', '--ignore-installed', pip_pkg])
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class NPMEnvironmentDependency(ArchiveBoxBaseDependency, SingletonModel):
|
||||||
|
singleton_instance_id = 1
|
||||||
|
|
||||||
|
id = models.AutoField(primary_key=True)
|
||||||
|
|
||||||
|
NAME = 'NODEJS'
|
||||||
|
LABEL = "NodeJS"
|
||||||
|
REQUIRED = False
|
||||||
|
|
||||||
|
PARENT_DEPENDENCIES = ['BashEnvironmentDependency']
|
||||||
|
|
||||||
|
BIN_DEPENDENCIES = ['node', 'npm']
|
||||||
|
APT_DEPENDENCIES = ['node', 'npm']
|
||||||
|
BREW_DEPENDENCIES = ['node', 'npm']
|
||||||
|
PIP_PACKAGES = []
|
||||||
|
NPM_PACKAGES = ['npm']
|
||||||
|
|
||||||
|
DEFAULT_BINARY = 'node'
|
||||||
|
DEFAULT_START_CMD = None
|
||||||
|
DEFAULT_STOP_CMD = None
|
||||||
|
DEFAULT_PID_FILE = None
|
||||||
|
DEFAULT_ARGS = ''
|
||||||
|
VERSION_CMD = '{BINARY} --version'
|
||||||
|
|
||||||
|
ENABLED = models.BooleanField(default=True, editable=True)
|
||||||
|
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY)
|
||||||
|
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = False
|
||||||
|
app_label = 'system'
|
||||||
|
verbose_name = "Package Manager: npm"
|
||||||
|
|
||||||
|
# @task
|
||||||
|
def install_pkgs(self, npm_pkgs=()):
|
||||||
|
assert self.is_valid, 'NPM environment is not available on this host'
|
||||||
|
|
||||||
|
for npm_pkg in npm_pkgs:
|
||||||
|
run(cmd=[self.DEFAULT_BINARY, 'install', npm_pkg])
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class DjangoDependency(ArchiveBoxBaseDependency, SingletonModel):
|
||||||
|
singleton_instance_id = 1
|
||||||
|
|
||||||
|
id = models.AutoField(primary_key=True)
|
||||||
|
|
||||||
|
NAME = 'DJANGO'
|
||||||
|
LABEL = "Django"
|
||||||
|
REQUIRED = True
|
||||||
|
|
||||||
|
PARENT_DEPENDENCIES = []
|
||||||
|
|
||||||
|
BIN_DEPENDENCIES = ['django-admin.py']
|
||||||
|
APT_DEPENDENCIES = []
|
||||||
|
BREW_DEPENDENCIES = []
|
||||||
|
PIP_PACKAGES = ['django==3.1.14']
|
||||||
|
NPM_PACKAGES = []
|
||||||
|
|
||||||
|
DEFAULT_BINARY = 'django-admin.py'
|
||||||
|
DEFAULT_START_CMD = 'archivebox server 0.0.0.0:8000'
|
||||||
|
DEFAULT_PID_FILE = 'logs/{NAME}_WORKER.pid'
|
||||||
|
DEFAULT_STOP_CMD = 'kill "$(<{PID_FILE})"'
|
||||||
|
DEFAULT_ARGS = []
|
||||||
|
VERSION_CMD = '{BINARY} --version'
|
||||||
|
|
||||||
|
ENABLED = models.BooleanField(default=True, editable=False)
|
||||||
|
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY, editable=False)
|
||||||
|
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS, editable=False)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = False
|
||||||
|
app_label = 'system'
|
||||||
|
verbose_name = "Internal Dependency: django"
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def bin_path(self):
|
||||||
|
return inspect.getfile(django)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def bin_version(self):
|
||||||
|
return '.'.join(str(v) for v in django.VERSION[:3])
|
||||||
|
|
||||||
|
|
||||||
|
class SQLiteDependency(ArchiveBoxBaseDependency, SingletonModel):
|
||||||
|
singleton_instance_id = 1
|
||||||
|
|
||||||
|
id = models.AutoField(primary_key=True)
|
||||||
|
|
||||||
|
NAME = 'SQLITE'
|
||||||
|
LABEL = "SQLite"
|
||||||
|
REQUIRED = True
|
||||||
|
|
||||||
|
PARENT_DEPENDENCIES = []
|
||||||
|
|
||||||
|
BIN_DEPENDENCIES = []
|
||||||
|
APT_DEPENDENCIES = []
|
||||||
|
BREW_DEPENDENCIES = []
|
||||||
|
PIP_PACKAGES = []
|
||||||
|
NPM_PACKAGES = []
|
||||||
|
|
||||||
|
DEFAULT_BINARY = 'sqlite3'
|
||||||
|
DEFAULT_START_CMD = None
|
||||||
|
DEFAULT_STOP_CMD = None
|
||||||
|
DEFAULT_PID_FILE = None
|
||||||
|
DEFAULT_ARGS = []
|
||||||
|
VERSION_CMD = 'python3 -c ""'
|
||||||
|
|
||||||
|
ENABLED = models.BooleanField(default=True, editable=False)
|
||||||
|
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY, editable=False)
|
||||||
|
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS, editable=False)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = False
|
||||||
|
app_label = 'system'
|
||||||
|
verbose_name = "Internal Dependency: sqlite3"
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def bin_path(self):
|
||||||
|
return inspect.getfile(sqlite3)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def bin_version(self):
|
||||||
|
return sqlite3.version
|
||||||
|
|
||||||
|
class ArchiveBoxDependency(ArchiveBoxBaseDependency):
|
||||||
|
singleton_instance_id = 1
|
||||||
|
|
||||||
|
id = models.AutoField(primary_key=True)
|
||||||
|
|
||||||
|
NAME = 'ARCHIVEBOX'
|
||||||
|
LABEL = "ArchiveBox"
|
||||||
|
REQUIRED = True
|
||||||
|
|
||||||
|
PARENT_DEPENDENCIES = [
|
||||||
|
'PipEnvironmentDependency',
|
||||||
|
'DjangoDependency',
|
||||||
|
'SQLiteDependency',
|
||||||
|
]
|
||||||
|
|
||||||
|
BIN_DEPENDENCIES = ['archivebox']
|
||||||
|
APT_DEPENDENCIES = []
|
||||||
|
BREW_DEPENDENCIES = []
|
||||||
|
PIP_PACKAGES = ['archivebox']
|
||||||
|
NPM_PACKAGES = []
|
||||||
|
|
||||||
|
DEFAULT_BINARY = 'archivebox'
|
||||||
|
DEFAULT_START_CMD = '{BINARY} server 0.0.0.0:8000'
|
||||||
|
DEFAULT_ARGS = []
|
||||||
|
VERSION_CMD = 'archivebox --version'
|
||||||
|
|
||||||
|
ENABLED = models.BooleanField(default=True, editable=False)
|
||||||
|
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY, editable=False)
|
||||||
|
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS, editable=False)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = False
|
||||||
|
app_label = 'system'
|
||||||
|
verbose_name = "Internal Dependency: archivebox"
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def bin_path(self):
|
||||||
|
return sys.argv[0] or bin_path('archivebox')
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def bin_version(self):
|
||||||
|
# return config['VERSION']
|
||||||
|
return VERSION
|
||||||
|
|
3
archivebox/plugins/system/settings.py
Normal file
3
archivebox/plugins/system/settings.py
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from plugins.defaults import register_plugin_settings
|
118
archivebox/templates/static/sw.js
Normal file
118
archivebox/templates/static/sw.js
Normal file
File diff suppressed because one or more lines are too long
3390
archivebox/templates/static/ui.js
Normal file
3390
archivebox/templates/static/ui.js
Normal file
File diff suppressed because one or more lines are too long
|
@ -271,7 +271,11 @@ def get_headers(url: str, timeout: int=None) -> str:
|
||||||
|
|
||||||
return pyjson.dumps(
|
return pyjson.dumps(
|
||||||
{
|
{
|
||||||
|
'URL': url,
|
||||||
'Status-Code': response.status_code,
|
'Status-Code': response.status_code,
|
||||||
|
'Elapsed': response.elapsed,
|
||||||
|
'Encoding': response.encoding,
|
||||||
|
'Apparent-Encoding': response.apparent_encoding,
|
||||||
**dict(response.headers),
|
**dict(response.headers),
|
||||||
},
|
},
|
||||||
indent=4,
|
indent=4,
|
||||||
|
|
|
@ -64,7 +64,7 @@ if [[ -d "$DATA_DIR/archive" ]]; then
|
||||||
rm -f "$DATA_DIR/archive/.permissions_test_safe_to_delete"
|
rm -f "$DATA_DIR/archive/.permissions_test_safe_to_delete"
|
||||||
# echo "[√] Permissions are correct"
|
# echo "[√] Permissions are correct"
|
||||||
else
|
else
|
||||||
# the only time this fails is if the host filesystem doesn't allow us to write as root (e.g. some NFS mapall/maproot problems, connection issues, drive dissapeared, etc.)
|
# the only time this fails is if the host filesystem doesn't allow us to write as root (e.g. some NFS mapall/maproot problems, connection issues, drive dissapeared, etc.)
|
||||||
echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data/archive dir (currently owned by $(stat -c '%u' "$DATA_DIR/archive"):$(stat -c '%g' "$DATA_DIR/archive")." > /dev/stderr
|
echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data/archive dir (currently owned by $(stat -c '%u' "$DATA_DIR/archive"):$(stat -c '%g' "$DATA_DIR/archive")." > /dev/stderr
|
||||||
echo -e " Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:" > /dev/stderr
|
echo -e " Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:" > /dev/stderr
|
||||||
echo -e " \$ chown -R $PUID:$PGID ./data\n" > /dev/stderr
|
echo -e " \$ chown -R $PUID:$PGID ./data\n" > /dev/stderr
|
||||||
|
@ -89,7 +89,8 @@ if ! chown $PUID:$PGID "$DATA_DIR"/* > /dev/null 2>&1; then
|
||||||
find "$DATA_DIR" -type d -not -path "$DATA_DIR/archive*" -exec chown $PUID:$PGID {} \; > /dev/null 2>&1
|
find "$DATA_DIR" -type d -not -path "$DATA_DIR/archive*" -exec chown $PUID:$PGID {} \; > /dev/null 2>&1
|
||||||
find "$DATA_DIR" -type f -not -path "$DATA_DIR/archive/*" -exec chown $PUID:$PGID {} \; > /dev/null 2>&1
|
find "$DATA_DIR" -type f -not -path "$DATA_DIR/archive/*" -exec chown $PUID:$PGID {} \; > /dev/null 2>&1
|
||||||
fi
|
fi
|
||||||
|
mkdir -p /var/spool/cron/crontabs
|
||||||
|
chown -R $PUID:$PGID /var/spool/cron/crontabs > /dev/null 2>&1 &
|
||||||
|
|
||||||
# also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to 'playwright install chromium' at runtime
|
# also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to 'playwright install chromium' at runtime
|
||||||
export PLAYWRIGHT_BROWSERS_PATH="${PLAYWRIGHT_BROWSERS_PATH:-/browsers}"
|
export PLAYWRIGHT_BROWSERS_PATH="${PLAYWRIGHT_BROWSERS_PATH:-/browsers}"
|
||||||
|
@ -191,9 +192,11 @@ if [[ "$1" == /* || "$1" == "bash" || "$1" == "sh" || "$1" == "echo" || "$1" ==
|
||||||
# "docker run archivebox /bin/bash -c '...'"
|
# "docker run archivebox /bin/bash -c '...'"
|
||||||
# "docker run archivebox cat /VERSION.txt"
|
# "docker run archivebox cat /VERSION.txt"
|
||||||
exec gosu "$PUID" /bin/bash -c "exec $(printf ' %q' "$@")"
|
exec gosu "$PUID" /bin/bash -c "exec $(printf ' %q' "$@")"
|
||||||
|
# WARNING: make sure to test extensively if you change this line, there are many edge-cases with nested quotes, special character, etc.
|
||||||
# printf requotes shell parameters properly https://stackoverflow.com/a/39463371/2156113
|
# printf requotes shell parameters properly https://stackoverflow.com/a/39463371/2156113
|
||||||
# gosu spawns an ephemeral bash process owned by archivebox user (bash wrapper is needed to load env vars, PATH, and setup terminal TTY)
|
# gosu spawns an ephemeral bash process owned by archivebox user (bash wrapper is needed to load env vars, PATH, and setup terminal TTY)
|
||||||
# outermost exec hands over current process ID to inner bash process, inner exec hands over inner bash PID to user's command
|
# outermost exec hands over current process ID to inner bash process, inner exec hands over inner bash PID to user's command
|
||||||
|
# - https://github.com/ArchiveBox/ArchiveBox/issues/1191
|
||||||
else
|
else
|
||||||
# handle "docker run archivebox add some subcommand --with=args abc" by calling archivebox to run as args as CLI subcommand
|
# handle "docker run archivebox add some subcommand --with=args abc" by calling archivebox to run as args as CLI subcommand
|
||||||
# e.g. "docker run archivebox help"
|
# e.g. "docker run archivebox help"
|
||||||
|
|
38
bin/docker_ipc_listener.py
Executable file
38
bin/docker_ipc_listener.py
Executable file
|
@ -0,0 +1,38 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# Allow another docker container to run commands on this container
|
||||||
|
# This is the script to run on the server container.
|
||||||
|
# The client can connect and run a command like so:
|
||||||
|
# $ echo whoami | nc servercontainername 2222
|
||||||
|
# root
|
||||||
|
|
||||||
|
import socket
|
||||||
|
import subprocess as sp
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
LISTEN_PORT = 2222
|
||||||
|
|
||||||
|
s1 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
s1.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||||
|
s1.bind(("0.0.0.0", LISTEN_PORT))
|
||||||
|
s1.listen(1)
|
||||||
|
print("Listening for shell commands on 0.0.0.0:2222", flush=True)
|
||||||
|
|
||||||
|
conn, addr = s1.accept()
|
||||||
|
while True:
|
||||||
|
cmd = conn.recv(1024).decode()
|
||||||
|
if not cmd:
|
||||||
|
conn, addr = s1.accept()
|
||||||
|
continue
|
||||||
|
|
||||||
|
timestamp = datetime.now().isoformat()
|
||||||
|
client_ip, client_port = conn.getsockname()
|
||||||
|
print(f'\n[{timestamp}][{client_ip}:{client_port}] $', cmd)
|
||||||
|
|
||||||
|
with sp.Popen(cmd, shell=True, stdout=sp.PIPE, stderr=sp.STDOUT, stdin=sp.PIPE, bufsize=1, universal_newlines=True) as p:
|
||||||
|
for line in p.stdout:
|
||||||
|
print(line.strip(), flush=True)
|
||||||
|
conn.sendall(line.encode("utf-8"))
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
conn, addr = s1.accept()
|
|
@ -39,7 +39,6 @@ services:
|
||||||
# dns:
|
# dns:
|
||||||
# - 172.20.0.53
|
# - 172.20.0.53
|
||||||
|
|
||||||
|
|
||||||
######## Optional Addons: tweak examples below as needed for your specific use case ########
|
######## Optional Addons: tweak examples below as needed for your specific use case ########
|
||||||
|
|
||||||
### This optional container runs any scheduled tasks in the background, add new tasks like so:
|
### This optional container runs any scheduled tasks in the background, add new tasks like so:
|
||||||
|
@ -188,6 +187,13 @@ services:
|
||||||
# - ./wireguard.conf:/config/wg0.conf:ro
|
# - ./wireguard.conf:/config/wg0.conf:ro
|
||||||
|
|
||||||
|
|
||||||
|
### Example: Run browsertrix in parallel with ArchiveBox
|
||||||
|
|
||||||
|
# browsertrix:
|
||||||
|
# image: webrecorder/browsertrix-crawler:latest
|
||||||
|
# volumes:
|
||||||
|
# - ./browsertrix:/crawls:z
|
||||||
|
|
||||||
### Example: Run PYWB in parallel and auto-import WARCs from ArchiveBox
|
### Example: Run PYWB in parallel and auto-import WARCs from ArchiveBox
|
||||||
|
|
||||||
# pywb:
|
# pywb:
|
||||||
|
|
|
@ -7,7 +7,12 @@ wsgi-file = archivebox/core/wsgi.py
|
||||||
processes = 4
|
processes = 4
|
||||||
threads = 1
|
threads = 1
|
||||||
stats = 127.0.0.1:9191
|
stats = 127.0.0.1:9191
|
||||||
static-map /static=./archivebox/templates/static
|
static-map = /static=./archivebox/templates/static
|
||||||
|
static-map = /static=./archivebox/plugins/replaywebpage/static
|
||||||
|
static-map = /archive=$(PWD)/archive
|
||||||
|
static=index = index.html
|
||||||
harakiri = 172800
|
harakiri = 172800
|
||||||
post-buffering = 1
|
post-buffering = 1
|
||||||
disable-logging = True
|
disable-logging = True
|
||||||
|
check-static
|
||||||
|
honour-range = True
|
23
package.json
23
package.json
|
@ -1,13 +1,14 @@
|
||||||
{
|
{
|
||||||
"name": "archivebox",
|
"name": "archivebox",
|
||||||
"version": "0.8.0",
|
"version": "0.7.3",
|
||||||
"description": "ArchiveBox: The self-hosted internet archive",
|
"description": "ArchiveBox: The self-hosted internet archive",
|
||||||
"author": "Nick Sweeting <archivebox-npm@sweeting.me>",
|
"author": "Nick Sweeting <archivebox-npm@sweeting.me>",
|
||||||
"repository": "github:ArchiveBox/ArchiveBox",
|
"repository": "github:ArchiveBox/ArchiveBox",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies":
|
||||||
"@postlight/parser": "^2.2.3",
|
{
|
||||||
"readability-extractor": "github:ArchiveBox/readability-extractor",
|
"@postlight/parser": "^2.2.3",
|
||||||
"single-file-cli": "^1.1.54"
|
"readability-extractor": "github:ArchiveBox/readability-extractor",
|
||||||
}
|
"single-file-cli": "^1.1.54"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
29
pdm.lock
29
pdm.lock
|
@ -359,6 +359,19 @@ files = [
|
||||||
{file = "django_ninja-1.1.0.tar.gz", hash = "sha256:87bff046416a2653ed2fbef1408e101292bf8170684821bac82accfd73bef059"},
|
{file = "django_ninja-1.1.0.tar.gz", hash = "sha256:87bff046416a2653ed2fbef1408e101292bf8170684821bac82accfd73bef059"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "django-solo"
|
||||||
|
version = "2.0.0"
|
||||||
|
requires_python = ">=3.6"
|
||||||
|
summary = "Django Solo helps working with singletons"
|
||||||
|
dependencies = [
|
||||||
|
"django>=2.2",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "django-solo-2.0.0.tar.gz", hash = "sha256:7c6dbe04ae858a4645b580ec83a31a960a067ad4525d8227cca50b7fc5983a62"},
|
||||||
|
{file = "django_solo-2.0.0-py3-none-any.whl", hash = "sha256:9046eca738f2ed64dbef38c2107a02af1065a8899b4f9fabf61b06b8325de1b4"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "exceptiongroup"
|
name = "exceptiongroup"
|
||||||
version = "1.2.1"
|
version = "1.2.1"
|
||||||
|
@ -902,22 +915,12 @@ files = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "setuptools"
|
name = "setuptools"
|
||||||
version = "69.5.1"
|
version = "69.0.3"
|
||||||
requires_python = ">=3.8"
|
requires_python = ">=3.8"
|
||||||
summary = "Easily download, build, install, upgrade, and uninstall Python packages"
|
summary = "Easily download, build, install, upgrade, and uninstall Python packages"
|
||||||
groups = ["default"]
|
|
||||||
files = [
|
files = [
|
||||||
{file = "setuptools-69.5.1-py3-none-any.whl", hash = "sha256:c636ac361bc47580504644275c9ad802c50415c7522212252c033bd15f301f32"},
|
{file = "setuptools-69.0.3-py3-none-any.whl", hash = "sha256:385eb4edd9c9d5c17540511303e39a147ce2fc04bc55289c322b9e5904fe2c05"},
|
||||||
{file = "setuptools-69.5.1.tar.gz", hash = "sha256:6c1fccdac05a97e598fb0ae3bbed5904ccb317337a51139dcd51453611bbb987"},
|
{file = "setuptools-69.0.3.tar.gz", hash = "sha256:be1af57fc409f93647f2e8e4573a142ed38724b8cdd389706a867bb4efcf1e78"},
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "sgmllib3k"
|
|
||||||
version = "1.0.0"
|
|
||||||
summary = "Py3k port of sgmllib."
|
|
||||||
groups = ["default"]
|
|
||||||
files = [
|
|
||||||
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
|
@ -16,6 +16,7 @@ dependencies = [
|
||||||
"setuptools>=69.5.1",
|
"setuptools>=69.5.1",
|
||||||
"django>=4.2.0,<5.0",
|
"django>=4.2.0,<5.0",
|
||||||
"django-ninja>=1.1.0",
|
"django-ninja>=1.1.0",
|
||||||
|
"django-solo>=2.0.0",
|
||||||
"django-extensions>=3.2.3",
|
"django-extensions>=3.2.3",
|
||||||
"mypy-extensions>=1.0.0",
|
"mypy-extensions>=1.0.0",
|
||||||
|
|
||||||
|
@ -123,7 +124,9 @@ test = [
|
||||||
lint = [
|
lint = [
|
||||||
"flake8",
|
"flake8",
|
||||||
"mypy",
|
"mypy",
|
||||||
"django-stubs",
|
"django-stubs[compatible-mypy]>=4.2.7",
|
||||||
|
"types-requests>=2.31.0.20240125",
|
||||||
|
"pudb>=2024.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
@ -133,6 +136,21 @@ build-backend = "pdm.backend"
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
archivebox = "archivebox.cli:main"
|
archivebox = "archivebox.cli:main"
|
||||||
|
|
||||||
|
[tool.pyright]
|
||||||
|
include = ["archivebox"]
|
||||||
|
exclude = ["data", "data2", "data3", "data4", "data5", "pip_dist", "brew_dist", "dist", "vendor", "migrations", "tests"]
|
||||||
|
|
||||||
|
[tool.mypy]
|
||||||
|
mypy_path = "archivebox"
|
||||||
|
explicit_package_bases = true
|
||||||
|
check_untyped_defs = true
|
||||||
|
plugins = ["mypy_django_plugin.main"]
|
||||||
|
# TODO: remove this eventually https://github.com/hauntsaninja/no_implicit_optional
|
||||||
|
implicit_optional = true
|
||||||
|
|
||||||
|
[tool.django-stubs]
|
||||||
|
django_settings_module = "core.settings"
|
||||||
|
strict_settings = false
|
||||||
|
|
||||||
[tool.pdm.scripts]
|
[tool.pdm.scripts]
|
||||||
lint = "./bin/lint.sh"
|
lint = "./bin/lint.sh"
|
||||||
|
@ -142,19 +160,6 @@ test = "./bin/test.sh"
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
testpaths = [ "tests" ]
|
testpaths = [ "tests" ]
|
||||||
|
|
||||||
[tool.mypy]
|
|
||||||
mypy_path = "archivebox"
|
|
||||||
namespace_packages = true
|
|
||||||
explicit_package_bases = true
|
|
||||||
# follow_imports = "silent"
|
|
||||||
# ignore_missing_imports = true
|
|
||||||
# disallow_incomplete_defs = true
|
|
||||||
# disallow_untyped_defs = true
|
|
||||||
# disallow_untyped_decorators = true
|
|
||||||
# exclude = "pdm/(pep582/|models/in_process/.+\\.py)"
|
|
||||||
plugins = ["mypy_django_plugin.main"]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
Homepage = "https://github.com/ArchiveBox/ArchiveBox"
|
Homepage = "https://github.com/ArchiveBox/ArchiveBox"
|
||||||
|
|
Loading…
Reference in a new issue