1
0
Fork 0
mirror of synced 2024-09-28 23:31:09 +12:00

use new mypy TypedDict to manage config typing

This commit is contained in:
Nick Sweeting 2019-04-24 11:36:14 -04:00
parent 461a8b0d71
commit 11fd436305
5 changed files with 240 additions and 93 deletions

View file

@ -19,6 +19,7 @@ youtube-dl = "*"
python-crontab = "*" python-crontab = "*"
croniter = "*" croniter = "*"
ipython = "*" ipython = "*"
mypy-extensions = "*"
[requires] [requires]
python_version = "3.7" python_version = "3.7"

11
Pipfile.lock generated
View file

@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "b0dd2536174ddcdc8fe711dd92b577f270c5a34cdb3bcb06cc70842358c80fe2" "sha256": "ad0264907c26fe4227c5c0b6376733422b4e506a9eef057e0a563428373046dd"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": { "requires": {
@ -100,6 +100,14 @@
], ],
"version": "==0.13.3" "version": "==0.13.3"
}, },
"mypy-extensions": {
"hashes": [
"sha256:37e0e956f41369209a3d5f34580150bcacfabaa57b33a15c0b25f4b5725e0812",
"sha256:b16cabe759f55e3409a7d231ebd2841378fb0c27a5d1994719e340e4f429ac3e"
],
"index": "pypi",
"version": "==0.4.1"
},
"parso": { "parso": {
"hashes": [ "hashes": [
"sha256:17cc2d7a945eb42c3569d4564cdf49bde221bc2b552af3eca9c1aad517dcdd33", "sha256:17cc2d7a945eb42c3569d4564cdf49bde221bc2b552af3eca9c1aad517dcdd33",
@ -306,6 +314,7 @@
"sha256:37e0e956f41369209a3d5f34580150bcacfabaa57b33a15c0b25f4b5725e0812", "sha256:37e0e956f41369209a3d5f34580150bcacfabaa57b33a15c0b25f4b5725e0812",
"sha256:b16cabe759f55e3409a7d231ebd2841378fb0c27a5d1994719e340e4f429ac3e" "sha256:b16cabe759f55e3409a7d231ebd2841378fb0c27a5d1994719e340e4f429ac3e"
], ],
"index": "pypi",
"version": "==0.4.1" "version": "==0.4.1"
}, },
"parso": { "parso": {

View file

@ -9,10 +9,17 @@ import getpass
import shutil import shutil
from hashlib import md5 from hashlib import md5
from typing import Any, Optional, Dict, Tuple from typing import Optional, Type, Tuple
from subprocess import run, PIPE, DEVNULL from subprocess import run, PIPE, DEVNULL
CONFIG_TYPE = Dict[str, Any] from .config_stubs import (
SimpleConfigValueDict,
ConfigValue,
ConfigDict,
ConfigDefaultValue,
ConfigDefaultDict,
)
# ****************************************************************************** # ******************************************************************************
# Documentation: https://github.com/pirate/ArchiveBox/wiki/Configuration # Documentation: https://github.com/pirate/ArchiveBox/wiki/Configuration
@ -22,13 +29,13 @@ CONFIG_TYPE = Dict[str, Any]
################################# User Config ################################## ################################# User Config ##################################
SHELL_CONFIG_DEFAULTS = { SHELL_CONFIG_DEFAULTS: ConfigDefaultDict = {
'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()}, 'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()},
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']}, 'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
'SHOW_PROGRESS': {'type': bool, 'default': lambda c: c['IS_TTY']}, 'SHOW_PROGRESS': {'type': bool, 'default': lambda c: c['IS_TTY']},
} }
ARCHIVE_CONFIG_DEFAULTS = { ARCHIVE_CONFIG_DEFAULTS: ConfigDefaultDict = {
'OUTPUT_DIR': {'type': str, 'default': None}, 'OUTPUT_DIR': {'type': str, 'default': None},
'ONLY_NEW': {'type': bool, 'default': False}, 'ONLY_NEW': {'type': bool, 'default': False},
'TIMEOUT': {'type': int, 'default': 60}, 'TIMEOUT': {'type': int, 'default': 60},
@ -38,22 +45,22 @@ ARCHIVE_CONFIG_DEFAULTS = {
'URL_BLACKLIST': {'type': str, 'default': None}, 'URL_BLACKLIST': {'type': str, 'default': None},
} }
ARCHIVE_METHOD_TOGGLES_DEFAULTS = { ARCHIVE_METHOD_TOGGLES_DEFAULTS: ConfigDefaultDict = {
'SAVE_TITLE': {'type': bool, 'default': True, 'aliases': ('FETCH_TITLE',)}, 'SAVE_TITLE': {'type': bool, 'default': True, 'aliases': ('FETCH_TITLE',)},
'SAVE_FAVICON': {'type': bool, 'default': True, 'aliases': ('FETCH_FAVICON',)}, 'SAVE_FAVICON': {'type': bool, 'default': True, 'aliases': ('FETCH_FAVICON',)},
'SAVE_WGET': {'type': bool, 'default': True, 'aliases': ('FETCH_WGET',)}, 'SAVE_WGET': {'type': bool, 'default': True, 'aliases': ('FETCH_WGET',)},
'SAVE_WGET_REQUISITES': {'type': bool, 'default': True, 'aliases': ('FETCH_WGET_REQUISITES',)}, 'SAVE_WGET_REQUISITES': {'type': bool, 'default': True, 'aliases': ('FETCH_WGET_REQUISITES',)},
'SAVE_PDF': {'type': bool, 'default': True, 'aliases': ('FETCH_PDF',)}, 'SAVE_PDF': {'type': bool, 'default': True, 'aliases': ('FETCH_PDF',)},
'SAVE_SCREENSHOT': {'type': bool, 'default': True, 'aliases': ('FETCH_SCREENSHOT',)}, 'SAVE_SCREENSHOT': {'type': bool, 'default': True, 'aliases': ('FETCH_SCREENSHOT',)},
'SAVE_DOM': {'type': bool, 'default': True, 'aliases': ('FETCH_DOM',)}, 'SAVE_DOM': {'type': bool, 'default': True, 'aliases': ('FETCH_DOM',)},
'SAVE_WARC': {'type': bool, 'default': True, 'aliases': ('FETCH_WARC',)}, 'SAVE_WARC': {'type': bool, 'default': True, 'aliases': ('FETCH_WARC',)},
'SAVE_GIT': {'type': bool, 'default': True, 'aliases': ('FETCH_GIT',)}, 'SAVE_GIT': {'type': bool, 'default': True, 'aliases': ('FETCH_GIT',)},
'SAVE_MEDIA': {'type': bool, 'default': True, 'aliases': ('FETCH_MEDIA',)}, 'SAVE_MEDIA': {'type': bool, 'default': True, 'aliases': ('FETCH_MEDIA',)},
'SAVE_ARCHIVE_DOT_ORG': {'type': bool, 'default': True, 'aliases': ('SUBMIT_ARCHIVE_DOT_ORG',)}, 'SAVE_ARCHIVE_DOT_ORG': {'type': bool, 'default': True, 'aliases': ('SUBMIT_ARCHIVE_DOT_ORG',)},
} }
ARCHIVE_METHOD_OPTIONS_DEFAULTS = { ARCHIVE_METHOD_OPTIONS_DEFAULTS: ConfigDefaultDict = {
'RESOLUTION': {'type': str, 'default': '1440,2000'}, 'RESOLUTION': {'type': str, 'default': '1440,2000', 'aliases': ('SCREENSHOT_RESOLUTION',)},
'GIT_DOMAINS': {'type': str, 'default': 'github.com,bitbucket.org,gitlab.com'}, 'GIT_DOMAINS': {'type': str, 'default': 'github.com,bitbucket.org,gitlab.com'},
'CHECK_SSL_VALIDITY': {'type': bool, 'default': True}, 'CHECK_SSL_VALIDITY': {'type': bool, 'default': True},
@ -67,7 +74,7 @@ ARCHIVE_METHOD_OPTIONS_DEFAULTS = {
'CHROME_SANDBOX': {'type': bool, 'default': True}, 'CHROME_SANDBOX': {'type': bool, 'default': True},
} }
DEPENDENCY_CONFIG_DEFAULTS = { DEPENDENCY_CONFIG_DEFAULTS: ConfigDefaultDict = {
'USE_CURL': {'type': bool, 'default': True}, 'USE_CURL': {'type': bool, 'default': True},
'USE_WGET': {'type': bool, 'default': True}, 'USE_WGET': {'type': bool, 'default': True},
'USE_GIT': {'type': bool, 'default': True}, 'USE_GIT': {'type': bool, 'default': True},
@ -116,7 +123,7 @@ FAVICON_FILENAME = 'favicon.ico'
DERIVED_CONFIG_DEFAULTS = { DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
'TERM_WIDTH': {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns}, 'TERM_WIDTH': {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns},
'USER': {'default': lambda c: getpass.getuser() or os.getlogin()}, 'USER': {'default': lambda c: getpass.getuser() or os.getlogin()},
'ANSI': {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}}, 'ANSI': {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
@ -131,7 +138,7 @@ DERIVED_CONFIG_DEFAULTS = {
'SOURCES_DIR': {'default': lambda c: os.path.join(c['OUTPUT_DIR'], SOURCES_DIR_NAME)}, 'SOURCES_DIR': {'default': lambda c: os.path.join(c['OUTPUT_DIR'], SOURCES_DIR_NAME)},
'LOGS_DIR': {'default': lambda c: os.path.join(c['OUTPUT_DIR'], LOGS_DIR_NAME)}, 'LOGS_DIR': {'default': lambda c: os.path.join(c['OUTPUT_DIR'], LOGS_DIR_NAME)},
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and os.path.abspath(os.path.expanduser(c['COOKIES_FILE']))}, 'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and os.path.abspath(os.path.expanduser(c['COOKIES_FILE']))},
'CHROME_USER_DATA_DIR': {'default': lambda c: c['CHROME_USER_DATA_DIR'] and os.path.abspath(os.path.expanduser(c['CHROME_USER_DATA_DIR']))}, 'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (os.path.abspath(os.path.expanduser(c['CHROME_USER_DATA_DIR'])) or None)},
'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'], re.IGNORECASE)}, 'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'], re.IGNORECASE)},
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0]}, 'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0]},
@ -168,7 +175,6 @@ DERIVED_CONFIG_DEFAULTS = {
'USE_CHROME': {'default': lambda c: c['USE_CHROME'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'])}, 'USE_CHROME': {'default': lambda c: c['USE_CHROME'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'])},
'CHROME_BINARY': {'default': lambda c: c['CHROME_BINARY'] if c['CHROME_BINARY'] else find_chrome_binary()}, 'CHROME_BINARY': {'default': lambda c: c['CHROME_BINARY'] if c['CHROME_BINARY'] else find_chrome_binary()},
'CHROME_VERSION': {'default': lambda c: bin_version(c['CHROME_BINARY']) if c['USE_CHROME'] else None}, 'CHROME_VERSION': {'default': lambda c: bin_version(c['CHROME_BINARY']) if c['USE_CHROME'] else None},
'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (c['CHROME_USER_DATA_DIR'] or None)},
'SAVE_PDF': {'default': lambda c: c['USE_CHROME']}, 'SAVE_PDF': {'default': lambda c: c['USE_CHROME']},
'SAVE_SCREENSHOT': {'default': lambda c: c['USE_CHROME']}, 'SAVE_SCREENSHOT': {'default': lambda c: c['USE_CHROME']},
'SAVE_DOM': {'default': lambda c: c['USE_CHROME']}, 'SAVE_DOM': {'default': lambda c: c['USE_CHROME']},
@ -184,7 +190,12 @@ DERIVED_CONFIG_DEFAULTS = {
################################### Helpers #################################### ################################### Helpers ####################################
def get_config_val(key: str, default: Any=None, type=None, aliases: Optional[Tuple[str, ...]]=None, config: CONFIG_TYPE=None) -> Any: def load_config_val(key: str,
default: ConfigDefaultValue=None,
type: Optional[Type]=None,
aliases: Optional[Tuple[str, ...]]=None,
config: Optional[ConfigDict]=None) -> ConfigValue:
# check the canonical option name first, then check any older aliases # check the canonical option name first, then check any older aliases
possible_env_keys = (key, *(aliases or ())) possible_env_keys = (key, *(aliases or ()))
for key in possible_env_keys: for key in possible_env_keys:
@ -193,7 +204,8 @@ def get_config_val(key: str, default: Any=None, type=None, aliases: Optional[Tup
break break
if type is None or val is None: if type is None or val is None:
if hasattr(default, '__call__'): if callable(default):
assert isinstance(config, dict)
return default(config) return default(config)
return default return default
@ -218,16 +230,22 @@ def get_config_val(key: str, default: Any=None, type=None, aliases: Optional[Tup
raise Exception('Config values can only be str, bool, or int') raise Exception('Config values can only be str, bool, or int')
def load_config(defaults: dict, config: Optional[CONFIG_TYPE]=None) -> CONFIG_TYPE: def load_config(defaults: ConfigDefaultDict, config: Optional[ConfigDict]=None) -> ConfigDict:
config = {**(config or {})} extended_config: ConfigDict = config.copy() if config else {}
for key, default in defaults.items(): for key, default in defaults.items():
try: try:
config[key] = get_config_val(key, **default, config=config) extended_config[key] = load_config_val(
key,
default=default['default'],
type=default.get('type'),
aliases=default.get('aliases'),
config=extended_config,
)
except KeyboardInterrupt: except KeyboardInterrupt:
raise SystemExit(1) raise SystemExit(1)
except Exception as e: except Exception as e:
stderr() stderr()
stderr(f'[X] Error while loading configuration value: {key}', color='red', config=config) stderr(f'[X] Error while loading configuration value: {key}', color='red', config=extended_config)
stderr(' {}: {}'.format(e.__class__.__name__, e)) stderr(' {}: {}'.format(e.__class__.__name__, e))
stderr() stderr()
stderr(' Check your config for mistakes and try again (your archive data is unaffected).') stderr(' Check your config for mistakes and try again (your archive data is unaffected).')
@ -237,27 +255,27 @@ def load_config(defaults: dict, config: Optional[CONFIG_TYPE]=None) -> CONFIG_TY
stderr() stderr()
raise SystemExit(1) raise SystemExit(1)
return config return extended_config
def stderr(*args, color: Optional[str]=None, config: Optional[CONFIG_TYPE]=None) -> None: def stderr(*args, color: Optional[str]=None, config: Optional[ConfigDict]=None) -> None:
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
if color: if color:
strs = (ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n') strs = [ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n']
else: else:
strs = (' '.join(str(a) for a in args), '\n') strs = [' '.join(str(a) for a in args), '\n']
sys.stderr.write(''.join(strs)) sys.stderr.write(''.join(strs))
def bin_version(binary: str) -> Optional[str]: def bin_version(binary: Optional[str]) -> Optional[str]:
"""check the presence and return valid version line of a specified binary""" """check the presence and return valid version line of a specified binary"""
binary = os.path.expanduser(binary) abspath = bin_path(binary)
try: if not abspath:
if not shutil.which(binary): return None
raise Exception
version_str = run([binary, "--version"], stdout=PIPE).stdout.strip().decode() try:
version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode()
# take first 3 columns of first line of version info # take first 3 columns of first line of version info
return ' '.join(version_str.split('\n')[0].strip().split()[:3]) return ' '.join(version_str.split('\n')[0].strip().split()[:3])
except Exception: except Exception:
@ -270,13 +288,19 @@ def bin_version(binary: str) -> Optional[str]:
# stderr() # stderr()
return None return None
def bin_hash(binary: str) -> Optional[str]: def bin_path(binary: Optional[str]) -> Optional[str]:
bin_path = binary and shutil.which(os.path.expanduser(binary)) if binary is None:
if not bin_path: return None
return shutil.which(os.path.expanduser(binary)) or binary
def bin_hash(binary: Optional[str]) -> Optional[str]:
abs_path = bin_path(binary)
if abs_path is None:
return None return None
file_hash = md5() file_hash = md5()
with io.open(bin_path, mode='rb') as f: with io.open(abs_path, mode='rb') as f:
for chunk in iter(lambda: f.read(io.DEFAULT_BUFFER_SIZE), b''): for chunk in iter(lambda: f.read(io.DEFAULT_BUFFER_SIZE), b''):
file_hash.update(chunk) file_hash.update(chunk)
@ -340,7 +364,7 @@ def wget_supports_compression(config):
] ]
return not run(cmd, stdout=DEVNULL, stderr=DEVNULL).returncode return not run(cmd, stdout=DEVNULL, stderr=DEVNULL).returncode
def get_code_locations(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]: def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
return { return {
'REPO_DIR': { 'REPO_DIR': {
'path': os.path.abspath(config['REPO_DIR']), 'path': os.path.abspath(config['REPO_DIR']),
@ -364,21 +388,22 @@ def get_code_locations(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]:
}, },
} }
def get_config_locations(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]: def get_config_locations(config: ConfigDict) -> ConfigValue:
abspath = lambda path: None if path is None else os.path.abspath(path)
return { return {
'CHROME_USER_DATA_DIR': { 'CHROME_USER_DATA_DIR': {
'path': config['CHROME_USER_DATA_DIR'] and os.path.abspath(config['CHROME_USER_DATA_DIR']), 'path': abspath(config['CHROME_USER_DATA_DIR']),
'enabled': config['USE_CHROME'] and config['CHROME_USER_DATA_DIR'], 'enabled': config['USE_CHROME'] and config['CHROME_USER_DATA_DIR'],
'is_valid': os.path.exists(os.path.join(config['CHROME_USER_DATA_DIR'], 'Default')) if config['CHROME_USER_DATA_DIR'] else False, 'is_valid': False if config['CHROME_USER_DATA_DIR'] is None else os.path.exists(os.path.join(config['CHROME_USER_DATA_DIR'], 'Default')),
}, },
'COOKIES_FILE': { 'COOKIES_FILE': {
'path': config['COOKIES_FILE'] and os.path.abspath(config['COOKIES_FILE']), 'path': abspath(config['COOKIES_FILE']),
'enabled': config['USE_WGET'] and config['COOKIES_FILE'], 'enabled': config['USE_WGET'] and config['COOKIES_FILE'],
'is_valid': config['COOKIES_FILE'] and os.path.exists(config['COOKIES_FILE']), 'is_valid': False if config['COOKIES_FILE'] is None else os.path.exists(config['COOKIES_FILE']),
}, },
} }
def get_data_locations(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]: def get_data_locations(config: ConfigDict) -> ConfigValue:
return { return {
'OUTPUT_DIR': { 'OUTPUT_DIR': {
'path': os.path.abspath(config['OUTPUT_DIR']), 'path': os.path.abspath(config['OUTPUT_DIR']),
@ -400,54 +425,59 @@ def get_data_locations(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]:
'enabled': True, 'enabled': True,
'is_valid': os.path.exists(config['ARCHIVE_DIR']), 'is_valid': os.path.exists(config['ARCHIVE_DIR']),
}, },
'SQL_INDEX': {
'path': os.path.abspath(os.path.join(config['OUTPUT_DIR'], JSON_INDEX_FILENAME)),
'enabled': True,
'is_valid': os.path.exists(os.path.join(config['OUTPUT_DIR'], JSON_INDEX_FILENAME)),
},
} }
def get_dependency_info(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]: def get_dependency_info(config: ConfigDict) -> ConfigValue:
return { return {
'PYTHON_BINARY': { 'PYTHON_BINARY': {
'path': config['PYTHON_BINARY'], 'path': bin_path(config['PYTHON_BINARY']),
'version': config['PYTHON_VERSION'], 'version': config['PYTHON_VERSION'],
'hash': bin_hash(config['PYTHON_BINARY']), 'hash': bin_hash(config['PYTHON_BINARY']),
'enabled': True, 'enabled': True,
'is_valid': bool(config['DJANGO_VERSION']), 'is_valid': bool(config['DJANGO_VERSION']),
}, },
'DJANGO_BINARY': { 'DJANGO_BINARY': {
'path': config['DJANGO_BINARY'], 'path': bin_path(config['DJANGO_BINARY']),
'version': config['DJANGO_VERSION'], 'version': config['DJANGO_VERSION'],
'hash': bin_hash(config['DJANGO_BINARY']), 'hash': bin_hash(config['DJANGO_BINARY']),
'enabled': True, 'enabled': True,
'is_valid': bool(config['DJANGO_VERSION']), 'is_valid': bool(config['DJANGO_VERSION']),
}, },
'CURL_BINARY': { 'CURL_BINARY': {
'path': (config['CURL_BINARY'] and shutil.which(config['CURL_BINARY'])) or config['CURL_BINARY'], 'path': bin_path(config['CURL_BINARY']),
'version': config['CURL_VERSION'], 'version': config['CURL_VERSION'],
'hash': bin_hash(config['PYTHON_BINARY']), 'hash': bin_hash(config['PYTHON_BINARY']),
'enabled': config['USE_CURL'], 'enabled': config['USE_CURL'],
'is_valid': bool(config['CURL_VERSION']), 'is_valid': bool(config['CURL_VERSION']),
}, },
'WGET_BINARY': { 'WGET_BINARY': {
'path': (config['WGET_BINARY'] and shutil.which(config['WGET_BINARY'])) or config['WGET_BINARY'], 'path': bin_path(config['WGET_BINARY']),
'version': config['WGET_VERSION'], 'version': config['WGET_VERSION'],
'hash': bin_hash(config['WGET_BINARY']), 'hash': bin_hash(config['WGET_BINARY']),
'enabled': config['USE_WGET'], 'enabled': config['USE_WGET'],
'is_valid': bool(config['WGET_VERSION']), 'is_valid': bool(config['WGET_VERSION']),
}, },
'GIT_BINARY': { 'GIT_BINARY': {
'path': (config['GIT_BINARY'] and shutil.which(config['GIT_BINARY'])) or config['GIT_BINARY'], 'path': bin_path(config['GIT_BINARY']),
'version': config['GIT_VERSION'], 'version': config['GIT_VERSION'],
'hash': bin_hash(config['GIT_BINARY']), 'hash': bin_hash(config['GIT_BINARY']),
'enabled': config['USE_GIT'], 'enabled': config['USE_GIT'],
'is_valid': bool(config['GIT_VERSION']), 'is_valid': bool(config['GIT_VERSION']),
}, },
'YOUTUBEDL_BINARY': { 'YOUTUBEDL_BINARY': {
'path': (config['YOUTUBEDL_BINARY'] and shutil.which(config['YOUTUBEDL_BINARY'])) or config['YOUTUBEDL_BINARY'], 'path': bin_path(config['YOUTUBEDL_BINARY']),
'version': config['YOUTUBEDL_VERSION'], 'version': config['YOUTUBEDL_VERSION'],
'hash': bin_hash(config['YOUTUBEDL_BINARY']), 'hash': bin_hash(config['YOUTUBEDL_BINARY']),
'enabled': config['USE_YOUTUBEDL'], 'enabled': config['USE_YOUTUBEDL'],
'is_valid': bool(config['YOUTUBEDL_VERSION']), 'is_valid': bool(config['YOUTUBEDL_VERSION']),
}, },
'CHROME_BINARY': { 'CHROME_BINARY': {
'path': (config['CHROME_BINARY'] and shutil.which(config['CHROME_BINARY'])) or config['CHROME_BINARY'], 'path': bin_path(config['CHROME_BINARY']),
'version': config['CHROME_VERSION'], 'version': config['CHROME_VERSION'],
'hash': bin_hash(config['CHROME_BINARY']), 'hash': bin_hash(config['CHROME_BINARY']),
'enabled': config['USE_CHROME'], 'enabled': config['USE_CHROME'],
@ -455,7 +485,7 @@ def get_dependency_info(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]:
}, },
} }
def get_chrome_info(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]: def get_chrome_info(config: ConfigDict) -> ConfigValue:
return { return {
'TIMEOUT': config['TIMEOUT'], 'TIMEOUT': config['TIMEOUT'],
'RESOLUTION': config['RESOLUTION'], 'RESOLUTION': config['RESOLUTION'],
@ -470,6 +500,8 @@ def get_chrome_info(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]:
################################## Load Config ################################# ################################## Load Config #################################
CONFIG: ConfigDict
CONFIG = load_config(SHELL_CONFIG_DEFAULTS) CONFIG = load_config(SHELL_CONFIG_DEFAULTS)
CONFIG = load_config(ARCHIVE_CONFIG_DEFAULTS, CONFIG) CONFIG = load_config(ARCHIVE_CONFIG_DEFAULTS, CONFIG)
CONFIG = load_config(ARCHIVE_METHOD_TOGGLES_DEFAULTS, CONFIG) CONFIG = load_config(ARCHIVE_METHOD_TOGGLES_DEFAULTS, CONFIG)
@ -480,9 +512,7 @@ globals().update(CONFIG)
############################## Importable Checkers ############################# ############################## Importable Checkers #############################
def check_system_config(config: CONFIG_TYPE=CONFIG) -> None: def check_system_config(config: ConfigDict=CONFIG) -> None:
ANSI = config['ANSI']
### Check system environment ### Check system environment
if config['USER'] == 'root': if config['USER'] == 'root':
stderr('[!] ArchiveBox should never be run as root!', color='red') stderr('[!] ArchiveBox should never be run as root!', color='red')
@ -507,19 +537,20 @@ def check_system_config(config: CONFIG_TYPE=CONFIG) -> None:
# stderr('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY)) # stderr('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))
# stderr('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR))) # stderr('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR)))
if config['CHROME_USER_DATA_DIR'] and not os.path.exists(os.path.join(config['CHROME_USER_DATA_DIR'], 'Default')): if config['CHROME_USER_DATA_DIR'] is not None:
stderr('[X] Could not find profile "Default" in CHROME_USER_DATA_DIR.', color='red') if not os.path.exists(os.path.join(config['CHROME_USER_DATA_DIR'], 'Default')):
stderr(f' {config["CHROME_USER_DATA_DIR"]}') stderr('[X] Could not find profile "Default" in CHROME_USER_DATA_DIR.', color='red')
stderr(' Make sure you set it to a Chrome user data directory containing a Default profile folder.') stderr(f' {config["CHROME_USER_DATA_DIR"]}')
stderr(' For more info see:') stderr(' Make sure you set it to a Chrome user data directory containing a Default profile folder.')
stderr(' https://github.com/pirate/ArchiveBox/wiki/Configuration#CHROME_USER_DATA_DIR') stderr(' For more info see:')
if 'Default' in config['CHROME_USER_DATA_DIR']: stderr(' https://github.com/pirate/ArchiveBox/wiki/Configuration#CHROME_USER_DATA_DIR')
stderr() if 'Default' in config['CHROME_USER_DATA_DIR']:
stderr(' Try removing /Default from the end e.g.:') stderr()
stderr(' CHROME_USER_DATA_DIR="{}"'.format(config['CHROME_USER_DATA_DIR'].split('/Default')[0])) stderr(' Try removing /Default from the end e.g.:')
raise SystemExit(1) stderr(' CHROME_USER_DATA_DIR="{}"'.format(config['CHROME_USER_DATA_DIR'].split('/Default')[0]))
raise SystemExit(1)
def check_dependencies(config: CONFIG_TYPE=CONFIG, show_help: bool=True) -> None: def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
invalid = [ invalid = [
'{}: {} ({})'.format(name, info['path'] or 'unable to find binary', info['version'] or 'unable to detect version') '{}: {} ({})'.format(name, info['path'] or 'unable to find binary', info['version'] or 'unable to detect version')
for name, info in config['DEPENDENCIES'].items() for name, info in config['DEPENDENCIES'].items()
@ -564,12 +595,14 @@ def check_dependencies(config: CONFIG_TYPE=CONFIG, show_help: bool=True) -> None
stderr(' https://github.com/pirate/ArchiveBox/wiki/Configuration#save_media') stderr(' https://github.com/pirate/ArchiveBox/wiki/Configuration#save_media')
def check_data_folder(out_dir: Optional[str]=None, config: CONFIG_TYPE=CONFIG) -> None: def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) -> None:
out_dir = out_dir or config['OUTPUT_DIR'] output_dir = out_dir or config['OUTPUT_DIR']
json_index_exists = os.path.exists(os.path.join(out_dir, JSON_INDEX_FILENAME)) assert isinstance(output_dir, str)
json_index_exists = os.path.exists(os.path.join(output_dir, JSON_INDEX_FILENAME))
if not json_index_exists: if not json_index_exists:
stderr('[X] No archive index was found in current directory.', color='red') stderr('[X] No archive index was found in current directory.', color='red')
stderr(f' {out_dir}') stderr(f' {output_dir}')
stderr() stderr()
stderr(' Are you running archivebox in the right folder?') stderr(' Are you running archivebox in the right folder?')
stderr(' cd path/to/your/archive/folder') stderr(' cd path/to/your/archive/folder')
@ -579,7 +612,7 @@ def check_data_folder(out_dir: Optional[str]=None, config: CONFIG_TYPE=CONFIG) -
stderr(' archivebox init') stderr(' archivebox init')
raise SystemExit(1) raise SystemExit(1)
sql_index_exists = os.path.exists(os.path.join(out_dir, SQL_INDEX_FILENAME)) sql_index_exists = os.path.exists(os.path.join(output_dir, SQL_INDEX_FILENAME))
from .storage.sql import list_migrations from .storage.sql import list_migrations
pending_migrations = [name for status, name in list_migrations() if not status] pending_migrations = [name for status, name in list_migrations() if not status]
@ -591,7 +624,7 @@ def check_data_folder(out_dir: Optional[str]=None, config: CONFIG_TYPE=CONFIG) -
pending_operation = 'generate the new SQL main index' pending_operation = 'generate the new SQL main index'
stderr('[X] This collection was created with an older version of ArchiveBox and must be upgraded first.', color='lightyellow') stderr('[X] This collection was created with an older version of ArchiveBox and must be upgraded first.', color='lightyellow')
stderr(f' {out_dir}') stderr(f' {output_dir}')
stderr() stderr()
stderr(f' To upgrade it to the latest version and {pending_operation} run:') stderr(f' To upgrade it to the latest version and {pending_operation} run:')
stderr(' archivebox init') stderr(' archivebox init')
@ -599,26 +632,21 @@ def check_data_folder(out_dir: Optional[str]=None, config: CONFIG_TYPE=CONFIG) -
def setup_django(out_dir: str=None, check_db=False, config: CONFIG_TYPE=CONFIG) -> None: def setup_django(out_dir: str=None, check_db=False, config: ConfigDict=CONFIG) -> None:
output_dir = out_dir or config['OUTPUT_DIR']
assert isinstance(output_dir, str) and isinstance(config['PYTHON_DIR'], str)
import django import django
sys.path.append(config['PYTHON_DIR']) sys.path.append(config['PYTHON_DIR'])
os.environ.setdefault('OUTPUT_DIR', out_dir or config['OUTPUT_DIR']) os.environ.setdefault('OUTPUT_DIR', output_dir)
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings') os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
django.setup() django.setup()
if check_db: if check_db:
sql_index_path = os.path.join(out_dir or config['OUTPUT_DIR'], SQL_INDEX_FILENAME) sql_index_path = os.path.join(output_dir, SQL_INDEX_FILENAME)
assert os.path.exists(sql_index_path), ( assert os.path.exists(sql_index_path), (
f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {config["OUTPUT_DIR"]}') f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {config["OUTPUT_DIR"]}')
check_system_config() check_system_config()
__all__ = (
'stderr',
'check_data_folder',
'check_dependencies',
'setup_django',
*CONFIG,
)

View file

@ -0,0 +1,108 @@
from typing import Optional, Dict, Union, Tuple, Callable, Pattern, Type, Any
from mypy_extensions import TypedDict
SimpleConfigValue = Union[str, bool, int, None, Pattern, Dict[str, Any]]
SimpleConfigValueDict = Dict[str, SimpleConfigValue]
SimpleConfigValueGetter = Callable[[], SimpleConfigValue]
ConfigValue = Union[SimpleConfigValue, SimpleConfigValueDict, SimpleConfigValueGetter]
class BaseConfig(TypedDict):
pass
class ConfigDict(BaseConfig, total=False):
IS_TTY: bool
USE_COLOR: bool
SHOW_PROGRESS: bool
OUTPUT_DIR: str
ONLY_NEW: bool
TIMEOUT: int
MEDIA_TIMEOUT: int
OUTPUT_PERMISSIONS: str
FOOTER_INFO: str
URL_BLACKLIST: Optional[str]
SAVE_TITLE: bool
SAVE_FAVICON: bool
SAVE_WGET: bool
SAVE_WGET_REQUISITES: bool
SAVE_PDF: bool
SAVE_SCREENSHOT: bool
SAVE_DOM: bool
SAVE_WARC: bool
SAVE_GIT: bool
SAVE_MEDIA: bool
SAVE_ARCHIVE_DOT_ORG: bool
RESOLUTION: str
GIT_DOMAINS: str
CHECK_SSL_VALIDITY: bool
WGET_USER_AGENT: str
CHROME_USER_AGENT: str
COOKIES_FILE: Optional[str]
CHROME_USER_DATA_DIR: Optional[str]
CHROME_HEADLESS: bool
CHROME_SANDBOX: bool
USE_CURL: bool
USE_WGET: bool
USE_GIT: bool
USE_CHROME: bool
USE_YOUTUBEDL: bool
CURL_BINARY: Optional[str]
GIT_BINARY: Optional[str]
WGET_BINARY: Optional[str]
YOUTUBEDL_BINARY: Optional[str]
CHROME_BINARY: Optional[str]
TERM_WIDTH: Callable[[], int]
USER: str
ANSI: Dict[str, str]
REPO_DIR: str
PYTHON_DIR: str
LEGACY_DIR: str
TEMPLATES_DIR: str
ARCHIVE_DIR: str
SOURCES_DIR: str
LOGS_DIR: str
URL_BLACKLIST_PTN: Optional[Pattern]
WGET_AUTO_COMPRESSION: bool
ARCHIVEBOX_BINARY: str
VERSION: str
GIT_SHA: str
PYTHON_BINARY: str
PYTHON_ENCODING: str
PYTHON_VERSION: str
DJANGO_BINARY: str
DJANGO_VERSION: str
CURL_VERSION: str
WGET_VERSION: str
YOUTUBEDL_VERSION: str
GIT_VERSION: str
CHROME_VERSION: str
DEPENDENCIES: Dict[str, SimpleConfigValueDict]
CODE_LOCATIONS: Dict[str, SimpleConfigValueDict]
CONFIG_LOCATIONS: Dict[str, SimpleConfigValueDict]
DATA_LOCATIONS: Dict[str, SimpleConfigValueDict]
CHROME_OPTIONS: Dict[str, SimpleConfigValue]
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
ConfigDefaultValue = Union[ConfigValue, ConfigDefaultValueGetter]
ConfigDefault = TypedDict('ConfigDefault', {
'default': ConfigDefaultValue,
'type': Optional[Type],
'aliases': Optional[Tuple[str, ...]],
}, total=False)
ConfigDefaultDict = Dict[str, ConfigDefault]

View file

@ -37,10 +37,11 @@ setuptools.setup(
python_requires='>=3.6', python_requires='>=3.6',
install_requires=[ install_requires=[
"dataclasses==0.6", "dataclasses==0.6",
"mypy-extensions==0.4.1",
"base32-crockford==0.3.0", "base32-crockford==0.3.0",
"django==2.2", "django==2.2",
"django-extensions==2.1.6", "django-extensions==2.1.6",
"python-crontab", "python-crontab==2.3.6",
"youtube-dl", "youtube-dl",
"ipython", "ipython",