1
0
Fork 0
mirror of synced 2024-09-30 09:06:19 +13:00

add defaults and system plugins

This commit is contained in:
Nick Sweeting 2024-01-24 00:07:07 -08:00
parent 0c878eb754
commit d0e3c9502e
16 changed files with 1131 additions and 146 deletions

View file

@ -59,12 +59,17 @@ INSTALLED_APPS = [
'django.contrib.messages',
'django.contrib.staticfiles',
'django.contrib.admin',
'solo',
'core',
# Plugins
'plugins.replaywebpage',
'plugins.gallerydl',
'plugins.defaults',
'plugins.system',
# 'plugins.replaywebpage',
# 'plugins.gallerydl',
# 'plugins.browsertrix',
# 'plugins.playwright',
# ...
@ -87,8 +92,9 @@ STATICFILES_DIRS = [
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'static'),
# Plugins
str(Path(PACKAGE_DIR) / 'plugins/replaywebpage/static'),
str(Path(PACKAGE_DIR) / 'plugins/gallerydl/static'),
# str(Path(PACKAGE_DIR) / 'plugins/defaults/static'),
# str(Path(PACKAGE_DIR) / 'plugins/replaywebpage/static'),
# str(Path(PACKAGE_DIR) / 'plugins/gallerydl/static'),
# str(Path(PACKAGE_DIR) / 'plugins/browsertrix/static'),
# str(Path(PACKAGE_DIR) / 'plugins/playwright/static'),
# ...
@ -107,8 +113,10 @@ TEMPLATE_DIRS = [
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME),
# Plugins
str(Path(PACKAGE_DIR) / 'plugins/replaywebpage/templates'),
str(Path(PACKAGE_DIR) / 'plugins/gallerydl/templates'),
# added by plugins.<PluginName>.apps.<AppName>.ready -> .settings.register_plugin_settings
# str(Path(PACKAGE_DIR) / 'plugins/defaults/templates'),
# str(Path(PACKAGE_DIR) / 'plugins/replaywebpage/templates'),
# str(Path(PACKAGE_DIR) / 'plugins/gallerydl/templates'),
# str(Path(PACKAGE_DIR) / 'plugins/browsertrix/templates'),
# str(Path(PACKAGE_DIR) / 'plugins/playwright/templates'),
# ...

View file

@ -0,0 +1,21 @@
from django.contrib import admin
from solo.admin import SingletonModelAdmin
from .models import (
ArchiveBoxDefaultDependency,
ArchiveBoxDefaultExtractor,
)
class DependencyAdmin(SingletonModelAdmin):
readonly_fields = ('REQUIRED', 'ENABLED', 'BINARY', 'ARGS', 'bin_path', 'bin_version', 'is_valid', 'is_enabled')
class ExtractorAdmin(SingletonModelAdmin):
# readonly_fields = ('REQUIRED', 'ENABLED', 'BINARY', 'ARGS', 'bin_path', 'bin_version', 'is_valid', 'is_enabled')
pass
print('DefaultsPluginConfig.admin')
admin.site.register(ArchiveBoxDefaultDependency, DependencyAdmin)
admin.site.register(ArchiveBoxDefaultExtractor, ExtractorAdmin)

View file

@ -0,0 +1,22 @@
__package__ = 'archivebox.plugins.defaults'
from django.apps import AppConfig
class DefaultsPluginConfig(AppConfig):
label = "ArchiveBox Defaults"
name = "defaults"
default_auto_field = "django.db.models.AutoField"
def ready(self):
print('plugins.defaults.apps.DefaultsPluginConfig.ready')
from django.conf import settings
from .settings import register_plugin_settings
register_plugin_settings(settings, name=self.name)

View file

@ -0,0 +1,39 @@
# Generated by Django 3.1.14 on 2024-01-24 08:06
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='ArchiveBoxDefaultDependency',
fields=[
('ENABLED', models.BooleanField(default=True, editable=False)),
('BINARY', models.CharField(default='/bin/false', max_length=255)),
('ARGS', models.CharField(default='', max_length=255)),
('id', models.AutoField(default=1, primary_key=True, serialize=False)),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='ArchiveBoxDefaultExtractor',
fields=[
('ENABLED', models.BooleanField(default=True)),
('CMD', models.CharField(default=['{DEPENDENCY.BINARY}', '{ARGS}', '{url}'], max_length=255)),
('ARGS', models.CharField(default=['--timeout={TIMEOUT}'], max_length=255)),
('TIMEOUT', models.CharField(default='{TIMEOUT}', max_length=255)),
('id', models.AutoField(default=1, primary_key=True, serialize=False)),
],
options={
'abstract': False,
},
),
]

View file

@ -0,0 +1,361 @@
# __package__ = 'archivebox.plugins.defaults'
import shutil
from typing import List, Dict, Any
from pathlib import Path
from django.db import models, transaction
from django.utils.functional import cached_property
from solo.models import SingletonModel
ConfigDict = Dict[str, Any]
def bin_path(binary: str) -> str | None:
return shutil.which(str(Path(binary).expanduser())) or shutil.which(str(binary)) or binary
def bin_version(bin_path: str, cmd: str | None=None) -> str | None:
return '0.0.0'
class ArchiveBoxBaseDependency(SingletonModel):
singleton_instance_id = 1
id = models.AutoField(default=singleton_instance_id, primary_key=True)
NAME = 'DEFAULT'
LABEL = "Default"
REQUIRED = False
PARENT_DEPENDENCIES = []
BIN_DEPENDENCIES = []
APT_DEPENDENCIES = []
BREW_DEPENDENCIES = []
PIP_DEPENDENCIES = []
NPM_DEPENDENCIES = []
DEFAULT_BINARY = '/bin/false'
DEFAULT_START_CMD = '/bin/false'
DEFAULT_PID_FILE = 'logs/{NAME}_WORKER.pid'
DEFAULT_STOP_CMD = 'kill "$(<{PID_FILE})"'
DEFAULT_VERSION_COMMAND = '{CMD} --version'
DEFAULT_ARGS = ''
VERSION_CMD = '{BINARY} --version'
ENABLED = models.BooleanField(default=True, editable=False)
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY)
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS)
# START_CMD = models.CharField(max_length=255, default=DEFAULT_START_CMD)
# WORKERS = models.IntegerField(default=1)
class Meta:
abstract = True
app_label = 'defaults'
def __str__(self):
return "{self.LABEL} Dependency Configuration"
def __json__(self):
return {
'type': 'ArchiveBoxDependency',
'__class__': self.__class__.__name__,
'NAME': self.NAME,
'LABEL': self.LABEL,
'ENABLED': self.ENABLED,
'BINARY': self.BINARY,
'ARGS': self.ARGS,
# 'START_CMD': self.START_CMD,
# 'WORKERS': self.WORKERS,
}
@cached_property
def bin_path(self):
return bin_path(self.BINARY or self.DEFAULT_BINARY)
@cached_property
def bin_version(self):
return bin_version(self.bin_path, cmd=self.VERSION_CMD)
@cached_property
def is_valid(self):
return bool(self.bin_path and self.bin_version)
@cached_property
def is_enabled(self):
return bool(self.ENABLED and self.is_valid)
@cached_property
def pretty_version(self):
if self.enabled:
if self.is_valid:
color, symbol, note, version = 'green', '', 'valid', ''
parsed_version_num = re.search(r'[\d\.]+', self.bin_version)
if parsed_version_num:
version = f'v{parsed_version_num[0]}'
if not self.bin_version:
color, symbol, note, version = 'red', 'X', 'invalid', '?'
else:
color, symbol, note, version = 'lightyellow', '-', 'disabled', '-'
path = pretty_path(self.bin_path)
return ' '.join((
ANSI[color],
symbol,
ANSI['reset'],
name.ljust(21),
version.ljust(14),
ANSI[color],
note.ljust(8),
ANSI['reset'],
path.ljust(76),
))
# @helper
def install_parents(self, config):
return {
parent_dependency.NAME: parent_dependency.get_solo().install_self()
for parent_dependency in self.PARENT_DEPENDENCIES
}
# @helper
def install_self(self, config):
assert all(self.install_parents().values())
BashEnvironmentDependency.get_solo().install_pkgs(self.BIN_DEPENDENCIES)
AptEnvironmentDependency.get_solo().install_pkgs(self.APT_DEPENDENCIES)
BrewEnvironmentDependency.get_solo().install_pkgs(self.BREW_DEPENDENCIES)
PipEnvironmentDependency.get_solo().install_pkgs(self.PIP_DEPENDENCIES)
NPMEnvironmentDependency.get_solo().install_pkgs(self.NPM_DEPENDENCIES)
assert self.is_valid
return self.bin_version
# @task
def run(args, pwd, timeout):
errors = None
timer = TimedProgress(timeout, prefix=' ')
try:
proc = run(cmd=[self.bin_path, *args], pwd=pwd, timeout=timeout)
except Exception as err:
errors = err
finally:
timer.end()
return proc, timer, errors
class ArchiveBoxDefaultDependency(ArchiveBoxBaseDependency, SingletonModel):
singleton_instance_id = 1
id = models.AutoField(default=singleton_instance_id, primary_key=True)
class Meta:
abstract = False
app_label = 'defaults'
class ArchiveBoxBaseExtractor(SingletonModel):
singleton_instance_id = 1
id = models.AutoField(default=singleton_instance_id, primary_key=True)
NAME = 'DEFAULT'
LABEL = 'Default'
DEFAULT_DEPENDENCY = ArchiveBoxDefaultDependency
DEPENDENCY = DEFAULT_DEPENDENCY
DEFAULT_ENABLED = True
DEFAULT_CMD = ['{DEPENDENCY.BINARY}', '{ARGS}', '{url}']
DEFAULT_ARGS = ['--timeout={TIMEOUT}']
DEFAULT_TIMEOUT = '{TIMEOUT}'
# DEFAULT_USER_AGENT = '{USER_AGENT}'
# DEFAULT_COOKIES_TXT = '{COOKIES_TXT}'
ENABLED = models.BooleanField(default=DEFAULT_ENABLED, editable=True)
CMD = models.CharField(max_length=255, default=DEFAULT_CMD)
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS)
TIMEOUT = models.CharField(max_length=255, default=DEFAULT_TIMEOUT)
ALIASES = {
'ENABLED': (f'SAVE_{NAME}', f'USE_{NAME}', f'FETCH_{NAME}'),
}
def __str__(self):
return f"{self.LABEL} Extractor Configuration"
class Meta:
abstract = True
verbose_name = f"Default Extractor Configuration"
app_label = 'defaults'
@cached_property
def dependency(self):
return self.DEPENDENCY.get_solo()
def __json__(self):
return {
'type': 'ArchiveBoxExtractor',
'__class__': self.__class__.__name__,
'NAME': self.NAME,
'LABEL': self.LABEL,
'ENABLED': self.ENABLED,
'DEPENDENCY': self.dependency.__json__(),
'ARGS': self.ARGS,
'CMD': self.CMD,
'TIMEOUT': self.TIMEOUT,
'is_valid': self.is_valid,
'is_enabled': self.is_enabled,
}
def format_args(self, csv: List[str], **config):
un_prefixed_config = {**self.__json__()} # e.g. ENABLED=True
prefixed_config = { # e.g. GALLERYDL_ENABLED=True
f'{self.NAME}_{key}': value
for key, value in un_prefixed_config.items()
}
merged_config = {
**config, # e.g. TIMEOUT=60
**un_prefixed_config, # e.g. ENABLED=True
**prefixed_config, # e.g. GALLERYDL_ENABLED=True
}
formatted_config = [
arg.format(**merged_config)
for arg in csv
]
return formatted_config
@cached_property
def is_valid(self):
if not self.dependency.is_valid:
return False
# TIMEOUT must be at least 5 seconds
# if self.TIMEOUT < 5:
# return False
# assert Path(self.COOKIES_TXT).exists()
# TODO: validate user agent with uaparser
# TODO: validate args, cookies.txt?
return True
@cached_property
def is_enabled(self):
return self.ENABLED and self.is_valid and self.dependency.is_enabled
def save(self, *args, **kwargs):
assert self.is_valid
with transaction.atomic():
result = super().save(*args, **kwargs)
# post to message bus:
print({
'type': f'{self.__class__.__name__}.save',
'diff': self.__json__(),
'kwargs': kwargs,
})
# potential consumers of this event:
# - event logger: write to events.log
# - config file updater: writes to ArchiveBox.conf
# - supervisor: restarts relevant dependencies/extractors
# - etc...
return result
def out_dir(self, url: str, snapshot_dir: Path, config: ConfigDict):
return (snapshot_dir / self.NAME)
def create_out_dir(self, url: str, snapshot_dir: Path, config: ConfigDict):
out_dir = self.out_dir(url=url, snapshot_dir=snapshot_dir, config=config)
return out_dir.mkdir(exist_ok=True)
def should_extract(self, url: str, snapshot_dir: Path, config: ConfigDict):
# return False if extractor is disabled
if not self.is_enabled:
return False
out_dir = self.out_dir(url=url, snapshot_dir=snapshot_dir, config=config)
if has_existing_output := out_dir.glob('*'):
return False
if not (has_write_access := os.access(out_dir, os.W_OK | os.X_OK)):
return False
return True
def get_dependency_cmd(self, url: str, extractor_dir: Path, config: ConfigDict):
return [
self.format_args(self.CMD, **config),
url,
*self.format_args(self.ARGS, **config), # TODO: split and requote this properly
]
# @requires_config('HOSTNAME', 'TIMEOUT', 'USER_AGENT', 'CHECK_SSL_VALIDITY')
def extract(self, url: str, snapshot_dir: Path, config: ConfigDict):
if not self.ENABLED:
return
extractor_dir = self.create_extractor_directory(snapshot_dir)
cmd = self.get_dependency_cmd(url=url, extractor_dir=extractor_dir, config=config)
status, stdout, stderr, output_path = 'failed', '', '', None
try:
proc, timer, errors = self.dependency.run(cmd, cwd=extractor_dir, timeout=self.TIMEOUT)
stdout, stderr = proc.stdout, proc.stderr
if 'ERROR: Unsupported URL' in stderr:
hints = ('gallery-dl doesnt support this type of url yet',)
raise ArchiveError('Failed to save gallerydl', hints)
if proc.returncode == 0 and 'finished' in stdout:
output_path = extractor_dir / 'index.html'
status = 'succeeded'
except Exception as err:
stderr += err
num_bytes, num_dirs, num_files = get_dir_size(extractor_dir)
return ArchiveResult(
cmd=cmd,
pwd=str(out_dir),
cmd_version=self.dependency.bin_version,
cmd_path=self.dependency.bin_path,
cmd_hostname=config.HOSTNAME,
output_path=output_path,
stdout=stdout,
stderr=stderr,
status=status,
num_bytes=num_bytes,
num_files=num_files,
num_dirs=num_dirs,
**timer.stats,
)
class ArchiveBoxDefaultExtractor(ArchiveBoxBaseExtractor, SingletonModel):
singleton_instance_id = 1
id = models.AutoField(default=singleton_instance_id, primary_key=True)
class Meta:
abstract = False
app_label = 'defaults'

View file

@ -0,0 +1,12 @@
from django.conf import settings
def register_plugin_settings(settings=settings, name='defaults'):
settings.STATICFILES_DIRS += [
str(Path(PACKAGE_DIR) / f'plugins/{name}/static'),
]
settings.TEMPLATE_DIRS += [
str(Path(PACKAGE_DIR) / f'plugins/{name}/templates'),
]
print('REGISTERED PLUGIN SETTINGS', name)

View file

@ -0,0 +1,8 @@
from django.contrib import admin
from solo.admin import SingletonModelAdmin
from .models import GalleryDLDependency, GalleryDLExtractor
admin.site.register(GalleryDLDependency, SingletonModelAdmin)
admin.site.register(GalleryDLExtractor, SingletonModelAdmin)

View file

@ -1,166 +1,93 @@
from django.db import models
from django.utils.functional import cached_property
from solo.models import SingletonModel
class GalleryDLDependency(SingletonModel):
GALLERYDL_ENABLED = models.BooleanField(default=True)
GALLERYDL_BINARY = models.CharField(max_length=255, default='gallery-dl')
# GALLERYDL_WORKERS = models.IntegerField(default='{NUM_CORES}')
from archivebox.plugins.defaults.models import (
ArchiveBoxDefaultDependency,
ArchiveBoxDefaultExtractor,
BashEnvironmentDependency,
PipEnvironmentDependency,
)
def __str__(self):
return "GalleryDL Dependency Configuration"
class GalleryDLDependency(ArchiveBoxDefaultDependency, SingletonModel):
NAME = 'GALLERYDL'
LABEL = "GalleryDL"
REQUIRED = False
class Meta:
verbose_name = "GalleryDL Dependency Configuration"
PARENT_DEPENDENCIES = [
BashEnvironmentDependency,
PipEnvironmentDependency,
]
@cached_property
def bin_path(self):
return bin_path(self.GALLERYDL_BINARY)
BIN_DEPENDENCIES = ['gallery-dl']
APT_DEPENDENCIES = []
BREW_DEPENDENCIES = []
PIP_PACKAGES = ['gallery-dl']
NPM_PACKAGES = []
@cached_property
def bin_version(self):
return bin_version(self.bin_path)
DEFAULT_BINARY = 'gallery-dl'
DEFAULT_START_CMD = None
DEFAULT_ARGS = []
VERSION_CMD = '{BINARY} --version'
@cached_property
def is_valid(self):
return self.bin_path and self.bin_version
ENABLED = models.BooleanField(default=True)
BINARY = models.CharField(max_length=255, default='gallery-dl')
@cached_property
def enabled(self):
return self.GALLERYDL_ENABLED and self.is_valid
WORKERS = models.IntegerField(default='1')
def run(args, pwd, timeout):
errors = None
timer = TimedProgress(timeout, prefix=' ')
try:
proc = run(cmd=[self.bin_path, *args]=True, pwd=pwd, timeout=timeout)run(cmd=[self.bin_path, *args]=True, pwd=pwd, timeout=timeout)
class GalleryDLExtractor(ArchiveBoxDefaultExtractor, SingletonModel):
NAME = 'GALLERYDL'
LABEL = 'gallery-dl'
except Exception as err:
errors = err
finally:
timer.end()
return proc, timer, errors
def pretty_version(self):
if self.enabled:
if self.is_valid:
color, symbol, note, version = 'green', '', 'valid', ''
parsed_version_num = re.search(r'[\d\.]+', self.bin_version)
if parsed_version_num:
version = f'v{parsed_version_num[0]}'
if not self.bin_version:
color, symbol, note, version = 'red', 'X', 'invalid', '?'
else:
color, symbol, note, version = 'lightyellow', '-', 'disabled', '-'
path = pretty_path(self.bin_path)
return ' '.join((
ANSI[color],
symbol,
ANSI['reset'],
name.ljust(21),
version.ljust(14),
ANSI[color],
note.ljust(8),
ANSI['reset'],
path.ljust(76),
))
class GalleryDLExtractor(SingletonModel):
GALLERYDL_EXTRACTOR_NAME = 'gallerydl'
SAVE_GALLERYDL = models.BooleanField(default=True)
GALLERYDL_DEPENDENCY = GalleryDLDependency.get_solo()
DEPENDENCY = GalleryDLDependency.get_solo()
# https://github.com/mikf/gallery-dl
GALLERYDL_ARGS = models.CSVField(max_length=255, default=[])
GALLERYDL_TIMEOUT = models.IntegerField(default=lambda c: c['TIMEOUT'])
GALLERYDL_USER_AGENT = models.CharField(max_length=255, default='{USER_AGENT}')
GALLERYDL_COOKIES_TXT = models.CharField(max_length=255, default='{COOKIES_TXT}')
DEFAULT_CMD = [
'{DEPENDENCY.BINARY}',
'{ARGS}'
'{url}',
]
DEFAULT_ARGS = [
'--timeout', self.TIMEOUT.format(**config),
'--cookies', self.COOKIES_TXT.format(**config),
'--user-agent', self.COOKIES_TXT.format(**config),
'--verify', self.CHECK_SSL_VALIDITY.format(**config),
]
ALIASES = {
'SAVE_GALLERYDL': ('USE_GALLERYDL', 'FETCH_GALLERYDL'),
}
ENABLED = models.BooleanField(default=True)
@cached_property
def enabled(self):
return self.SAVE_GALLERYDL and self.GALLERYDL_DEPENDENCY.is_valid
CMD = models.CharField(max_length=255, default=DEFAULT_CMD)
ARGS = models.CSVField(max_length=255, default=DEFAULT_ARGS)
TIMEOUT = models.CharField(max_length=255, default='{TIMEOUT}')
USER_AGENT = models.CharField(max_length=255, default='{USER_AGENT}')
COOKIES_TXT = models.CharField(max_length=255, default='{COOKIES_TXT}')
CHECK_SSL_VALIDITY = models.CharField(default='{CHECK_SSL_VALIDITY}')
def __str__(self):
return "GalleryDL Extractor Configuration"
class Meta:
verbose_name = "GalleryDL Extractor Configuration"
def __json__(self):
return {
'SAVE_GALLERYDL': self.SAVE_GALLERYDL,
'GALLERYDL_DEPENDENCY': self.GALLERYDL_DEPENDENCY.__json__(),
'GALLERYDL_ARGS': self.GALLERYDL_ARGS,
'GALLERYDL_TIMEOUT': self.GALLERYDL_TIMEOUT,
'GALLERYDL_USER_AGENT': self.GALLERYDL_USER_AGENT,
'GALLERYDL_COOKIES_TXT': self.GALLERYDL_COOKIES_TXT,
}
def validate(self):
assert 5 < self.GALLERYDL_TIMEOUT, 'GALLERYDL_TIMEOUT must be at least 5 seconds'
# assert Path(self.GALLERYDL_COOKIES_TXT).exists()
# TODO: validate user agent with uaparser
# TODO: validate args, cookies.txt?
def save(self, *args, **kwargs):
self.validate()
with transaction.atomic():
result = super().save(*args, **kwargs)
emit_event({'type': 'GalleryDLExtractor.save', 'diff': self.__json__(), 'kwargs': kwargs})
# potential consumers of this event:
# - event logger: write to events.log
# - config file updater: writes to ArchiveBox.conf
# - supervisor: restarts relevant dependencies/extractors
# - etc...
return result
def create_extractor_directory(self, parent_dir: Path):
return subdir = (parent_dir / self.GALLERYDL_EXTRACTOR_NAME).mkdir(exist_ok=True)
def should_extract(self, parent_dir: Path):
existing_files = (parent_dir / self.GALLERYDL_EXTRACTOR_NAME).glob('*')
return not existing_files
def extract(self, url: str, out_dir: Path):
if not self.enabled:
# @task
# @requires_config('HOSTNAME', 'TIMEOUT', 'USER_AGENT', 'CHECK_SSL_VALIDITY')
def extract(self, url: str, out_dir: Path, config: ConfigDict):
if not self.ENABLED:
return
extractor_dir = self.create_extractor_directory(out_dir)
cmd = [
self.GALLERYDL_DEPENDENCY.bin_path,
self.CMD,
url,
'--timeout', GALLERYDL_TIMEOUT,
'--cookies', GALLERYDL_COOKIES_TXT,
'--user-agent', GALLERYDL_USER_AGENT,
'--verify', config.CHECK_SSL_VALIDITY
*self.GALLERYDL_ARGS,
'--timeout', self.TIMEOUT.format(**config),
'--cookies', self.COOKIES_TXT.format(**config),
'--user-agent', self.COOKIES_TXT.format(**config),
'--verify', self.CHECK_SSL_VALIDITY.format(**config),
*split_args(self.ARGS.format(**config)),
]
status, stdout, stderr, output_path = 'failed', '', '', None
try:
proc, timer, errors = self.GALLERYDL_DEPENDENCY.run(cmd, cwd=extractor_dir, timeout=self.GALLERYDL_TIMEOUT)
proc, timer, errors = self.DEPENDENCY.run(cmd, cwd=extractor_dir, timeout=self.GALLERYDL_TIMEOUT)
stdout, stderr = proc.stdout, proc.stderr
if 'ERROR: Unsupported URL' in stderr:
@ -176,17 +103,16 @@ class GalleryDLExtractor(SingletonModel):
num_bytes, num_dirs, num_files = get_dir_size(extractor_dir)
return ArchiveResult(
status=status,
cmd=cmd,
pwd=str(out_dir),
cmd_version=self.GALLERYDL_DEPENDENCY.bin_version,
cmd_path=self.GALLERYDL_DEPENDENCY.bin_path,
cmd_version=self.DEPENDENCY.bin_version,
cmd_path=self.DEPENDENCY.bin_path,
cmd_hostname=config.HOSTNAME,
output_path=output_path,
stdout=stdout,
stderr=stderr,
status=status,
num_bytes=num_bytes,
num_files=num_files,

View file

@ -0,0 +1,59 @@
dependencies:
GalleryDLDependency:
ID: gallerydl
LABEL: GalleryDL
REQUIRED: false
PARENT_DEPENDENCIES:
- BashEnvironmentDependency
- PipEnvironmentDependency
PIP_DEPENDENCIES:
- gallery-dl
USER_CONFIG:
ENABLED: models.BooleanField(max_length=255, default={DEFAULT_CONFIG.ENABLED})
BINARY: models.CharField(max_length=255, default={DEFAULT_CONFIG.BINARY})
DEFAULT_CONFIG:
ENABLED: true
BINARY: 'gallery-dl'
CONFIG_ALIASES:
- SAVE_GALLERYDL: ENABLED
- USE_GALLERYDL: ENABLED
- GALLERYDL_ENABLED: ENABLED
- GALLERYDL_BINARY: BINARY
TASKS:
# plugins.GalleryDLDependency
run_dependency: plugins.gallerydl.models.GalleryDLDependency.run_dependency
extractors:
GalleryDLExtractor:
ID: GALLERYDL
LABEL: GalleryDL
ENABLED: true
DEPENDENCY: GalleryDLDependency
CONFIG:
ENABLED: models.BooleanField(default={DEFAULT_CONFIG.ENABLED})
CMD: models.CharField(max_length=255, default={DEFAULT_CONFIG.CMD})
ARGS: models.CharField(max_length=255, default={DEFAULT_CONFIG.ARGS})
USER_AGENT: models.CharField(max_length=255, default={DEFAULT_CONFIG.USER_AGENT})
CHECK_SSL_VALIDITY: models.CharField(max_length=255, default={DEFAULT_CONFIG.CHECK_SSL_VALIDITY})
DEFAULT_CONFIG:
ENABLED: true
CMD: gallery-dl {args} {url}
ARGS: --user-agent={USER_AGENT} --check-ssl={CHECK_SSL_VALIDITY}
CHECK_SSL_VALIDITY: {CHECK_SSL_VALIDITY}
USER_AGENT: {USER_AGENT}
TASKS:
CREATE_OUT_DIR: plugins.gallerydl.tasks.create_out_dir
SHOULD_EXTRACT: plugins.gallerydl.tasks.should_extract
EXTRACT: plugins.gallerydl.tasks.extract

View file

@ -0,0 +1,34 @@
from django.contrib import admin
from solo.admin import SingletonModelAdmin
from plugins.defaults.admin import DependencyAdmin, ExtractorAdmin
from .models import (
BashEnvironmentDependency,
AptEnvironmentDependency,
BrewEnvironmentDependency,
PipEnvironmentDependency,
NPMEnvironmentDependency,
SQLiteDependency,
DjangoDependency,
ArchiveBoxDependency,
# ArchiveBoxDefaultExtractor,
)
print('DefaultsPluginConfig.admin')
admin.site.register(BashEnvironmentDependency, DependencyAdmin)
admin.site.register(AptEnvironmentDependency, DependencyAdmin)
admin.site.register(BrewEnvironmentDependency, DependencyAdmin)
admin.site.register(PipEnvironmentDependency, DependencyAdmin)
admin.site.register(NPMEnvironmentDependency, DependencyAdmin)
admin.site.register(SQLiteDependency, DependencyAdmin)
admin.site.register(DjangoDependency, DependencyAdmin)
admin.site.register(ArchiveBoxDependency, DependencyAdmin)
# admin.site.register(ArchiveBoxDefaultExtractor, ExtractorAdmin)

View file

@ -0,0 +1,21 @@
# __package__ = 'archivebox.plugins.system'
from django.apps import AppConfig
class SystemPluginConfig(AppConfig):
label = "ArchiveBox System"
name = "system"
default_auto_field = "django.db.models.AutoField"
def ready(self):
print('plugins.system.apps.SystemPluginConfig.ready')
from django.conf import settings
from .settings import register_plugin_settings
register_plugin_settings(settings, name=self.name)

View file

@ -0,0 +1,110 @@
# Generated by Django 3.1.14 on 2024-01-24 08:06
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='AptEnvironmentDependency',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('ENABLED', models.BooleanField(default=True)),
('BINARY', models.CharField(default='apt-get', max_length=255)),
('ARGS', models.CharField(default='-qq', max_length=255)),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='ArchiveBoxDependency',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('ENABLED', models.BooleanField(default=True, editable=False)),
('BINARY', models.CharField(default='archivebox', editable=False, max_length=255)),
('ARGS', models.CharField(default=[], editable=False, max_length=255)),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='BashEnvironmentDependency',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('ENABLED', models.BooleanField(default=True, editable=False)),
('BINARY', models.CharField(default='bash', max_length=255)),
('ARGS', models.CharField(default='-c', max_length=255)),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='BrewEnvironmentDependency',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('ENABLED', models.BooleanField(default=True)),
('BINARY', models.CharField(default='brew', max_length=255)),
('ARGS', models.CharField(default='', max_length=255)),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='DjangoDependency',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('ENABLED', models.BooleanField(default=True, editable=False)),
('BINARY', models.CharField(default='django-admin.py', editable=False, max_length=255)),
('ARGS', models.CharField(default=[], editable=False, max_length=255)),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='NPMEnvironmentDependency',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('ENABLED', models.BooleanField(default=True)),
('BINARY', models.CharField(default='node', max_length=255)),
('ARGS', models.CharField(default='', max_length=255)),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='PipEnvironmentDependency',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('ENABLED', models.BooleanField(default=True)),
('BINARY', models.CharField(default='pip3', max_length=255)),
('ARGS', models.CharField(default='', max_length=255)),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='SQLiteDependency',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('ENABLED', models.BooleanField(default=True, editable=False)),
('BINARY', models.CharField(default='sqlite3', editable=False, max_length=255)),
('ARGS', models.CharField(default=[], editable=False, max_length=255)),
],
options={
'abstract': False,
},
),
]

View file

@ -0,0 +1,361 @@
# __package__ = 'archivebox.plugins.system'
import os
import shutil
import sys
import inspect
import django
import sqlite3
from pathlib import Path
from typing import List, Dict, Any
from django.db import models
from django.utils.functional import cached_property
from solo.models import SingletonModel
from plugins.defaults.models import ArchiveBoxBaseDependency, bin_path, bin_version
ConfigDict = Dict[str, Any]
class BashEnvironmentDependency(ArchiveBoxBaseDependency, SingletonModel):
singleton_instance_id = 1
id = models.AutoField(primary_key=True)
NAME = 'BASH'
LABEL = "Bash"
REQUIRED = True
PARENT_DEPENDENCIES = []
BIN_DEPENDENCIES = ['bash']
APT_DEPENDENCIES = []
BREW_DEPENDENCIES = []
PIP_DEPENDENCIES = []
NPM_DEPENDENCIES = []
DEFAULT_BINARY = 'bash'
DEFAULT_START_CMD = None
DEFAULT_STOP_CMD = None
DEFAULT_PID_FILE = None
DEFAULT_ARGS = '-c'
VERSION_CMD = '{BINARY} --version'
ENABLED = models.BooleanField(default=True, editable=not REQUIRED)
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY)
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS)
# START_CMD = models.CharField(max_length=255, default=DEFAULT_START_CMD)
# WORKERS = models.IntegerField(default=1)
class Meta:
abstract = False
app_label = 'system'
# @task
def install_pkgs(self, os_pkgs=()):
assert self.is_valid, 'Bash environment is not available on this host'
for os_dependency in os_pkgs:
assert bin_path(os_dependency)
return True
class AptEnvironmentDependency(ArchiveBoxBaseDependency, SingletonModel):
singleton_instance_id = 1
id = models.AutoField(primary_key=True)
NAME = 'APT'
LABEL = "apt"
REQUIRED = False
PARENT_DEPENDENCIES = [BashEnvironmentDependency]
BIN_DEPENDENCIES = ['apt-get']
APT_DEPENDENCIES = []
BREW_DEPENDENCIES = []
PIP_PACKAGES = []
NPM_PACKAGES = []
DEFAULT_BINARY = 'apt-get'
DEFAULT_START_CMD = None
DEFAULT_STOP_CMD = None
DEFAULT_PID_FILE = None
DEFAULT_ARGS = '-qq'
ENABLED = models.BooleanField(default=True, editable=not REQUIRED)
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY)
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS)
class Meta:
abstract = False
app_label = 'system'
# @task
def install_pkgs(self, apt_pkgs=()):
assert self.is_valid, 'Apt environment is not available on this host'
run(cmd=[self.DEFAULT_BINARY, '-qq', 'update'])
for apt_package in apt_pkgs:
run(cmd=[self.DEFAULT_BINARY, 'install', '-y', apt_package])
return True
class BrewEnvironmentDependency(ArchiveBoxBaseDependency, SingletonModel):
singleton_instance_id = 1
id = models.AutoField(primary_key=True)
NAME = 'BREW'
LABEL = "homebrew"
REQUIRED = False
PARENT_DEPENDENCIES = [BashEnvironmentDependency]
BIN_DEPENDENCIES = ['brew']
APT_DEPENDENCIES = []
BREW_DEPENDENCIES = []
PIP_PACKAGES = []
NPM_PACKAGES = []
DEFAULT_BINARY = 'brew'
DEFAULT_START_CMD = None
DEFAULT_STOP_CMD = None
DEFAULT_PID_FILE = None
DEFAULT_ARGS = ''
ENABLED = models.BooleanField(default=True, editable=True)
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY)
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS)
class Meta:
abstract = False
app_label = 'system'
# @task
def install_pkgs(self, brew_pkgs=()):
assert self.is_valid, 'Brw environment is not available on this host'
run(cmd=[self.DEFAULT_BINARY, 'update'])
for brew_pkg in brew_pkgs:
run(cmd=[self.DEFAULT_BINARY, 'install', brew_pkg])
return True
class PipEnvironmentDependency(ArchiveBoxBaseDependency, SingletonModel):
singleton_instance_id = 1
id = models.AutoField(primary_key=True)
NAME = 'PIP'
LABEL = "pip"
REQUIRED = False
PARENT_DEPENDENCIES = [BashEnvironmentDependency]
BIN_DEPENDENCIES = ['python3', 'pip3']
APT_DEPENDENCIES = ['python3.11', 'pip3', 'pipx']
BREW_DEPENDENCIES = ['python@3.11', 'pipx']
PIP_PACKAGES = ['setuptools', 'pipx']
NPM_PACKAGES = []
DEFAULT_BINARY = 'pip3'
DEFAULT_START_CMD = None
DEFAULT_STOP_CMD = None
DEFAULT_PID_FILE = None
DEFAULT_ARGS = ''
VERSION_CMD = '{BINARY} --version'
ENABLED = models.BooleanField(default=True, editable=True)
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY)
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS)
class Meta:
abstract = False
app_label = 'system'
# @task
def install_pkgs(self, pip_pkgs=()):
assert self.is_valid, 'Pip environment is not available on this host'
for pip_pkg in pip_pkgs:
run(cmd=[self.DEFAULT_BINARY, 'install', '--update', '--ignore-installed', pip_pkg])
return True
class NPMEnvironmentDependency(ArchiveBoxBaseDependency, SingletonModel):
singleton_instance_id = 1
id = models.AutoField(primary_key=True)
NAME = 'NODEJS'
LABEL = "NodeJS"
REQUIRED = False
PARENT_DEPENDENCIES = [BashEnvironmentDependency]
BIN_DEPENDENCIES = ['node', 'npm']
APT_DEPENDENCIES = ['node', 'npm']
BREW_DEPENDENCIES = ['node', 'npm']
PIP_PACKAGES = []
NPM_PACKAGES = ['npm']
DEFAULT_BINARY = 'node'
DEFAULT_START_CMD = None
DEFAULT_STOP_CMD = None
DEFAULT_PID_FILE = None
DEFAULT_ARGS = ''
VERSION_CMD = '{BINARY} --version'
ENABLED = models.BooleanField(default=True, editable=True)
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY)
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS)
class Meta:
abstract = False
app_label = 'system'
# @task
def install_pkgs(self, npm_pkgs=()):
assert self.is_valid, 'NPM environment is not available on this host'
for npm_pkg in npm_pkgs:
run(cmd=[self.DEFAULT_BINARY, 'install', npm_pkg])
return True
class DjangoDependency(ArchiveBoxBaseDependency, SingletonModel):
singleton_instance_id = 1
id = models.AutoField(primary_key=True)
NAME = 'DJANGO'
LABEL = "Django"
REQUIRED = True
PARENT_DEPENDENCIES = []
BIN_DEPENDENCIES = ['django-admin.py']
APT_DEPENDENCIES = []
BREW_DEPENDENCIES = []
PIP_PACKAGES = ['django==3.1.14']
NPM_PACKAGES = []
DEFAULT_BINARY = 'django-admin.py'
DEFAULT_START_CMD = 'archivebox server 0.0.0.0:8000'
DEFAULT_PID_FILE = 'logs/{NAME}_WORKER.pid'
DEFAULT_STOP_CMD = 'kill "$(<{PID_FILE})"'
DEFAULT_ARGS = []
VERSION_CMD = '{BINARY} --version'
ENABLED = models.BooleanField(default=True, editable=False)
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY, editable=False)
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS, editable=False)
class Meta:
abstract = False
app_label = 'system'
@cached_property
def bin_path(self):
return inspect.getfile(django)
@cached_property
def bin_version(self):
return django.VERSION
class SQLiteDependency(ArchiveBoxBaseDependency, SingletonModel):
singleton_instance_id = 1
id = models.AutoField(primary_key=True)
NAME = 'SQLITE'
LABEL = "SQLite"
REQUIRED = True
PARENT_DEPENDENCIES = []
BIN_DEPENDENCIES = []
APT_DEPENDENCIES = []
BREW_DEPENDENCIES = []
PIP_PACKAGES = []
NPM_PACKAGES = []
DEFAULT_BINARY = 'sqlite3'
DEFAULT_START_CMD = None
DEFAULT_STOP_CMD = None
DEFAULT_PID_FILE = None
DEFAULT_ARGS = []
VERSION_CMD = 'python3 -c ""'
ENABLED = models.BooleanField(default=True, editable=False)
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY, editable=False)
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS, editable=False)
class Meta:
abstract = False
app_label = 'system'
@cached_property
def bin_path(self):
return inspect.getfile(sqlite3)
@cached_property
def bin_version(self):
return sqlite3.version
class ArchiveBoxDependency(ArchiveBoxBaseDependency, SingletonModel):
singleton_instance_id = 1
id = models.AutoField(primary_key=True)
NAME = 'ARCHIVEBOX'
LABEL = "ArchiveBox"
REQUIRED = True
PARENT_DEPENDENCIES = [
PipEnvironmentDependency,
DjangoDependency,
SQLiteDependency,
]
BIN_DEPENDENCIES = ['archivebox']
APT_DEPENDENCIES = []
BREW_DEPENDENCIES = []
PIP_PACKAGES = ['archivebox']
NPM_PACKAGES = []
DEFAULT_BINARY = 'archivebox'
DEFAULT_START_CMD = '{BINARY} server 0.0.0.0:8000'
DEFAULT_ARGS = []
VERSION_CMD = 'archivebox --version'
ENABLED = models.BooleanField(default=True, editable=False)
BINARY = models.CharField(max_length=255, default=DEFAULT_BINARY, editable=False)
ARGS = models.CharField(max_length=255, default=DEFAULT_ARGS, editable=False)
class Meta:
abstract = False
app_label = 'system'
@cached_property
def bin_path(self):
return sys.argv[0] or bin_path('archivebox')
@cached_property
def bin_version(self):
# return config['VERSION']
return '0.7.3+editable'

View file

@ -0,0 +1,3 @@
from django.conf import settings
from plugins.defaults import register_plugin_settings