1
0
Fork 0
mirror of synced 2024-06-26 10:00:19 +12:00

Merge pull request #866 from ajgon/feat/reverse-proxy-auth

This commit is contained in:
Nick Sweeting 2023-01-09 18:22:46 -08:00 committed by GitHub
commit 0487cb9733
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 61 additions and 30 deletions

View file

@ -89,18 +89,23 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
}, },
'SERVER_CONFIG': { 'SERVER_CONFIG': {
'SECRET_KEY': {'type': str, 'default': None}, 'SECRET_KEY': {'type': str, 'default': None},
'BIND_ADDR': {'type': str, 'default': lambda c: ['127.0.0.1:8000', '0.0.0.0:8000'][c['IN_DOCKER']]}, 'BIND_ADDR': {'type': str, 'default': lambda c: ['127.0.0.1:8000', '0.0.0.0:8000'][c['IN_DOCKER']]},
'ALLOWED_HOSTS': {'type': str, 'default': '*'}, 'ALLOWED_HOSTS': {'type': str, 'default': '*'},
'DEBUG': {'type': bool, 'default': False}, 'DEBUG': {'type': bool, 'default': False},
'PUBLIC_INDEX': {'type': bool, 'default': True}, 'PUBLIC_INDEX': {'type': bool, 'default': True},
'PUBLIC_SNAPSHOTS': {'type': bool, 'default': True}, 'PUBLIC_SNAPSHOTS': {'type': bool, 'default': True},
'PUBLIC_ADD_VIEW': {'type': bool, 'default': False}, 'PUBLIC_ADD_VIEW': {'type': bool, 'default': False},
'FOOTER_INFO': {'type': str, 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.'}, 'FOOTER_INFO': {'type': str, 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.'},
'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40}, 'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40},
'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None}, 'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None},
'TIME_ZONE': {'type': str, 'default': 'UTC'},
'TIMEZONE': {'type': str, 'default': 'UTC'}, 'TIMEZONE': {'type': str, 'default': 'UTC'},
'REVERSE_PROXY_USER_HEADER': {'type': str, 'default': 'Remote-User'},
'REVERSE_PROXY_WHITELIST': {'type': str, 'default': ''},
'LOGOUT_REDIRECT_URL': {'type': str, 'default': '/'},
'PREVIEW_ORIGINALS': {'type': bool, 'default': True}, 'PREVIEW_ORIGINALS': {'type': bool, 'default': True},
'LOGOUT_REDIRECT_URL': {'type': str, 'default': '/'},
}, },
'ARCHIVE_METHOD_TOGGLES': { 'ARCHIVE_METHOD_TOGGLES': {
@ -161,7 +166,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'--add-metadata', '--add-metadata',
'--max-filesize={}'.format(c['MEDIA_MAX_SIZE']), '--max-filesize={}'.format(c['MEDIA_MAX_SIZE']),
]}, ]},
'WGET_ARGS': {'type': list, 'default': ['--no-verbose', 'WGET_ARGS': {'type': list, 'default': ['--no-verbose',
'--adjust-extension', '--adjust-extension',
@ -204,7 +209,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'USE_NODE': {'type': bool, 'default': True}, 'USE_NODE': {'type': bool, 'default': True},
'USE_YOUTUBEDL': {'type': bool, 'default': True}, 'USE_YOUTUBEDL': {'type': bool, 'default': True},
'USE_RIPGREP': {'type': bool, 'default': True}, 'USE_RIPGREP': {'type': bool, 'default': True},
'CURL_BINARY': {'type': str, 'default': 'curl'}, 'CURL_BINARY': {'type': str, 'default': 'curl'},
'GIT_BINARY': {'type': str, 'default': 'git'}, 'GIT_BINARY': {'type': str, 'default': 'git'},
'WGET_BINARY': {'type': str, 'default': 'wget'}, 'WGET_BINARY': {'type': str, 'default': 'wget'},
@ -286,7 +291,7 @@ STATICFILE_EXTENSIONS = {
# that can be downloaded as-is, not html pages that need to be rendered # that can be downloaded as-is, not html pages that need to be rendered
'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp', 'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp',
'svg', 'svgz', 'webp', 'ps', 'eps', 'ai', 'svg', 'svgz', 'webp', 'ps', 'eps', 'ai',
'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v', 'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v',
'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8', 'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8',
'pdf', 'txt', 'rtf', 'rtfd', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx', 'pdf', 'txt', 'rtf', 'rtfd', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx',
'atom', 'rss', 'css', 'js', 'json', 'atom', 'rss', 'css', 'js', 'json',
@ -295,7 +300,7 @@ STATICFILE_EXTENSIONS = {
# Less common extensions to consider adding later # Less common extensions to consider adding later
# jar, swf, bin, com, exe, dll, deb # jar, swf, bin, com, exe, dll, deb
# ear, hqx, eot, wmlc, kml, kmz, cco, jardiff, jnlp, run, msi, msp, msm, # ear, hqx, eot, wmlc, kml, kmz, cco, jardiff, jnlp, run, msi, msp, msm,
# pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf, # pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf,
# ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml # ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml
@ -423,14 +428,14 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'CHROME_BINARY': {'default': lambda c: c['CHROME_BINARY'] or find_chrome_binary()}, 'CHROME_BINARY': {'default': lambda c: c['CHROME_BINARY'] or find_chrome_binary()},
'USE_CHROME': {'default': lambda c: c['USE_CHROME'] and c['CHROME_BINARY'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'] or c['SAVE_SINGLEFILE'])}, 'USE_CHROME': {'default': lambda c: c['USE_CHROME'] and c['CHROME_BINARY'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'] or c['SAVE_SINGLEFILE'])},
'CHROME_VERSION': {'default': lambda c: bin_version(c['CHROME_BINARY']) if c['USE_CHROME'] else None}, 'CHROME_VERSION': {'default': lambda c: bin_version(c['CHROME_BINARY']) if c['USE_CHROME'] else None},
'SAVE_PDF': {'default': lambda c: c['USE_CHROME'] and c['SAVE_PDF']}, 'SAVE_PDF': {'default': lambda c: c['USE_CHROME'] and c['SAVE_PDF']},
'SAVE_SCREENSHOT': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SCREENSHOT']}, 'SAVE_SCREENSHOT': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SCREENSHOT']},
'SAVE_DOM': {'default': lambda c: c['USE_CHROME'] and c['SAVE_DOM']}, 'SAVE_DOM': {'default': lambda c: c['USE_CHROME'] and c['SAVE_DOM']},
'SAVE_SINGLEFILE': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SINGLEFILE'] and c['USE_NODE']}, 'SAVE_SINGLEFILE': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SINGLEFILE'] and c['USE_NODE']},
'SAVE_READABILITY': {'default': lambda c: c['USE_READABILITY'] and c['USE_NODE']}, 'SAVE_READABILITY': {'default': lambda c: c['USE_READABILITY'] and c['USE_NODE']},
'SAVE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['USE_NODE']}, 'SAVE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['USE_NODE']},
'USE_NODE': {'default': lambda c: c['USE_NODE'] and (c['SAVE_READABILITY'] or c['SAVE_SINGLEFILE'] or c['SAVE_MERCURY'])}, 'USE_NODE': {'default': lambda c: c['USE_NODE'] and (c['SAVE_READABILITY'] or c['SAVE_SINGLEFILE'] or c['SAVE_MERCURY'])},
'NODE_VERSION': {'default': lambda c: bin_version(c['NODE_BINARY']) if c['USE_NODE'] else None}, 'NODE_VERSION': {'default': lambda c: bin_version(c['NODE_BINARY']) if c['USE_NODE'] else None},
@ -480,7 +485,7 @@ def load_config_val(key: str,
elif val.lower() in ('false', 'no', '0'): elif val.lower() in ('false', 'no', '0'):
return False return False
else: else:
raise ValueError(f'Invalid configuration option {key}={val} (expected a boolean: True/False)') raise ValueError(f'Invalid configuration option {key}={val} (expected a boolean: True/False)')
elif type is str: elif type is str:
if val.lower() in ('true', 'false', 'yes', 'no', '1', '0'): if val.lower() in ('true', 'false', 'yes', 'no', '1', '0'):
@ -505,7 +510,7 @@ def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]:
config_path = Path(out_dir) / CONFIG_FILENAME config_path = Path(out_dir) / CONFIG_FILENAME
if config_path.exists(): if config_path.exists():
config_file = ConfigParser() config_file = ConfigParser()
config_file.optionxform = str config_file.optionxform = str
config_file.read(config_path) config_file.read(config_path)
# flatten into one namespace # flatten into one namespace
config_file_vars = { config_file_vars = {
@ -529,7 +534,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
# #
# You can add options here manually in INI format, or automatically by running: # You can add options here manually in INI format, or automatically by running:
# archivebox config --set KEY=VALUE # archivebox config --set KEY=VALUE
# #
# If you modify this file manually, make sure to update your archive after by running: # If you modify this file manually, make sure to update your archive after by running:
# archivebox init # archivebox init
# #
@ -540,7 +545,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve() out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve()
config_path = Path(out_dir) / CONFIG_FILENAME config_path = Path(out_dir) / CONFIG_FILENAME
if not config_path.exists(): if not config_path.exists():
atomic_write(config_path, CONFIG_HEADER) atomic_write(config_path, CONFIG_HEADER)
@ -578,7 +583,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
with open(config_path, 'w+', encoding='utf-8') as new: with open(config_path, 'w+', encoding='utf-8') as new:
config_file.write(new) config_file.write(new)
try: try:
# validate the config by attempting to re-parse it # validate the config by attempting to re-parse it
CONFIG = load_all_config() CONFIG = load_all_config()
@ -591,20 +596,20 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
if Path(f'{config_path}.bak').exists(): if Path(f'{config_path}.bak').exists():
os.remove(f'{config_path}.bak') os.remove(f'{config_path}.bak')
return { return {
key.upper(): CONFIG.get(key.upper()) key.upper(): CONFIG.get(key.upper())
for key in config.keys() for key in config.keys()
} }
def load_config(defaults: ConfigDefaultDict, def load_config(defaults: ConfigDefaultDict,
config: Optional[ConfigDict]=None, config: Optional[ConfigDict]=None,
out_dir: Optional[str]=None, out_dir: Optional[str]=None,
env_vars: Optional[os._Environ]=None, env_vars: Optional[os._Environ]=None,
config_file_vars: Optional[Dict[str, str]]=None) -> ConfigDict: config_file_vars: Optional[Dict[str, str]]=None) -> ConfigDict:
env_vars = env_vars or os.environ env_vars = env_vars or os.environ
config_file_vars = config_file_vars or load_config_file(out_dir=out_dir) config_file_vars = config_file_vars or load_config_file(out_dir=out_dir)
@ -634,7 +639,7 @@ def load_config(defaults: ConfigDefaultDict,
stderr() stderr()
# raise # raise
raise SystemExit(2) raise SystemExit(2)
return extended_config return extended_config
# def write_config(config: ConfigDict): # def write_config(config: ConfigDict):
@ -719,7 +724,7 @@ def bin_hash(binary: Optional[str]) -> Optional[str]:
with io.open(abs_path, mode='rb') as f: with io.open(abs_path, mode='rb') as f:
for chunk in iter(lambda: f.read(io.DEFAULT_BUFFER_SIZE), b''): for chunk in iter(lambda: f.read(io.DEFAULT_BUFFER_SIZE), b''):
file_hash.update(chunk) file_hash.update(chunk)
return f'md5:{file_hash.hexdigest()}' return f'md5:{file_hash.hexdigest()}'
def find_chrome_binary() -> Optional[str]: def find_chrome_binary() -> Optional[str]:
@ -744,7 +749,7 @@ def find_chrome_binary() -> Optional[str]:
full_path_exists = shutil.which(name) full_path_exists = shutil.which(name)
if full_path_exists: if full_path_exists:
return name return name
return None return None
def find_chrome_data_dir() -> Optional[str]: def find_chrome_data_dir() -> Optional[str]:
@ -1181,7 +1186,7 @@ def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CO
def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None: def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
check_system_config() check_system_config()
output_dir = out_dir or Path(config['OUTPUT_DIR']) output_dir = out_dir or Path(config['OUTPUT_DIR'])
assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path) assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path)

View file

@ -1,8 +1,11 @@
__package__ = 'archivebox.core' __package__ = 'archivebox.core'
import ipaddress
from django.utils import timezone from django.utils import timezone
from django.contrib.auth.middleware import RemoteUserMiddleware
from django.core.exceptions import ImproperlyConfigured
from ..config import PUBLIC_SNAPSHOTS from ..config import PUBLIC_SNAPSHOTS, REVERSE_PROXY_USER_HEADER, REVERSE_PROXY_WHITELIST
def detect_timezone(request, activate: bool=True): def detect_timezone(request, activate: bool=True):
@ -35,3 +38,23 @@ def CacheControlMiddleware(get_response):
return response return response
return middleware return middleware
class ReverseProxyAuthMiddleware(RemoteUserMiddleware):
header = 'HTTP_{normalized}'.format(normalized=REVERSE_PROXY_USER_HEADER.replace('-', '_').upper())
def process_request(self, request):
if REVERSE_PROXY_WHITELIST == '':
return
ip = request.META.get('REMOTE_ADDR')
for cidr in REVERSE_PROXY_WHITELIST.split(','):
try:
network = ipaddress.ip_network(cidr)
except ValueError:
raise ImproperlyConfigured(
"The REVERSE_PROXY_WHITELIST config paramater is in invalid format, or "
"contains invalid CIDR. Correct format is a coma-separated list of IPv4/IPv6 CIDRs.")
if ipaddress.ip_address(ip) in network:
return super().process_request(request)

View file

@ -34,7 +34,8 @@ WSGI_APPLICATION = 'core.wsgi.application'
ROOT_URLCONF = 'core.urls' ROOT_URLCONF = 'core.urls'
LOGIN_URL = '/accounts/login/' LOGIN_URL = '/accounts/login/'
LOGOUT_REDIRECT_URL = '/' LOGOUT_REDIRECT_URL = os.environ.get('LOGOUT_REDIRECT_URL', '/')
PASSWORD_RESET_URL = '/accounts/password_reset/' PASSWORD_RESET_URL = '/accounts/password_reset/'
APPEND_SLASH = True APPEND_SLASH = True
@ -61,11 +62,13 @@ MIDDLEWARE = [
'django.middleware.common.CommonMiddleware', 'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware', 'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware',
'core.middleware.ReverseProxyAuthMiddleware',
'django.contrib.messages.middleware.MessageMiddleware', 'django.contrib.messages.middleware.MessageMiddleware',
'core.middleware.CacheControlMiddleware', 'core.middleware.CacheControlMiddleware',
] ]
AUTHENTICATION_BACKENDS = [ AUTHENTICATION_BACKENDS = [
'django.contrib.auth.backends.RemoteUserBackend',
'django.contrib.auth.backends.ModelBackend', 'django.contrib.auth.backends.ModelBackend',
] ]