From 972d57bd08870936ec2b3659cd7203a458805825 Mon Sep 17 00:00:00 2001 From: Cristian Date: Thu, 15 Oct 2020 08:42:46 -0500 Subject: [PATCH] feat: Add CURL_ARGS to control curl arguments --- archivebox/config/__init__.py | 8 +++++++- archivebox/extractors/archive_org.py | 6 ++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py index 98023d90..80107e0f 100644 --- a/archivebox/config/__init__.py +++ b/archivebox/config/__init__.py @@ -130,7 +130,12 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = { '--span-hosts', '--no-parent', '-e', 'robots=off', - ]} + ]}, + 'CURL_ARGS': {'type': list, 'default': ['--silent', + '--location', + '--head', + '--compressed' + ]} }, 'DEPENDENCY_CONFIG': { @@ -277,6 +282,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = { 'USE_CURL': {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['SAVE_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])}, 'CURL_VERSION': {'default': lambda c: bin_version(c['CURL_BINARY']) if c['USE_CURL'] else None}, 'CURL_USER_AGENT': {'default': lambda c: c['CURL_USER_AGENT'].format(**c)}, + 'CURL_ARGS': {'default': lambda c: c['CURL_ARGS'] or []}, 'SAVE_FAVICON': {'default': lambda c: c['USE_CURL'] and c['SAVE_FAVICON']}, 'SAVE_ARCHIVE_DOT_ORG': {'default': lambda c: c['USE_CURL'] and c['SAVE_ARCHIVE_DOT_ORG']}, diff --git a/archivebox/extractors/archive_org.py b/archivebox/extractors/archive_org.py index 016c3353..6ddd2133 100644 --- a/archivebox/extractors/archive_org.py +++ b/archivebox/extractors/archive_org.py @@ -13,6 +13,7 @@ from ..util import ( ) from ..config import ( TIMEOUT, + CURL_ARGS, CHECK_SSL_VALIDITY, SAVE_ARCHIVE_DOT_ORG, CURL_BINARY, @@ -45,10 +46,7 @@ def save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, timeout: int= submit_url = 'https://web.archive.org/save/{}'.format(link.url) cmd = [ CURL_BINARY, - '--silent', - '--location', - '--head', - '--compressed', + *CURL_ARGS, '--max-time', str(timeout), *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []), *([] if CHECK_SSL_VALIDITY else ['--insecure']),