From c12fe0e3d772f2d68ed65253b2cf04f15d034c20 Mon Sep 17 00:00:00 2001 From: Cristian Date: Thu, 15 Oct 2020 08:58:22 -0500 Subject: [PATCH] feat: Use CURL_ARGS on title extractor --- archivebox/config/__init__.py | 3 ++- archivebox/config/stubs.py | 2 ++ archivebox/extractors/git.py | 3 ++- archivebox/extractors/title.py | 5 ++--- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py index 390c5539..c661a1d6 100644 --- a/archivebox/config/__init__.py +++ b/archivebox/config/__init__.py @@ -134,7 +134,8 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = { 'CURL_ARGS': {'type': list, 'default': ['--silent', '--location', '--compressed' - ]} + ]}, + 'GIT_ARGS': {'type': list, 'default': ['--recursive']}, }, 'DEPENDENCY_CONFIG': { diff --git a/archivebox/config/stubs.py b/archivebox/config/stubs.py index ae66540e..bdeb831d 100644 --- a/archivebox/config/stubs.py +++ b/archivebox/config/stubs.py @@ -96,6 +96,8 @@ class ConfigDict(BaseConfig, total=False): YOUTUBEDL_ARGS: Optional[str] WGET_ARGS: Optional[str] + CURL_ARGS: Optional[str] + GIT_ARGS: Optional[str] ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue] diff --git a/archivebox/extractors/git.py b/archivebox/extractors/git.py index f054b222..fd20d4b6 100644 --- a/archivebox/extractors/git.py +++ b/archivebox/extractors/git.py @@ -18,6 +18,7 @@ from ..config import ( TIMEOUT, SAVE_GIT, GIT_BINARY, + GIT_ARGS, GIT_VERSION, GIT_DOMAINS, CHECK_SSL_VALIDITY @@ -56,7 +57,7 @@ def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> cmd = [ GIT_BINARY, 'clone', - '--recursive', + *GIT_ARGS, *([] if CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']), without_query(without_fragment(link.url)), ] diff --git a/archivebox/extractors/title.py b/archivebox/extractors/title.py index 7a5cd471..75068456 100644 --- a/archivebox/extractors/title.py +++ b/archivebox/extractors/title.py @@ -16,6 +16,7 @@ from ..config import ( CHECK_SSL_VALIDITY, SAVE_TITLE, CURL_BINARY, + CURL_ARGS, CURL_VERSION, CURL_USER_AGENT, setup_django, @@ -51,10 +52,8 @@ def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) - output: ArchiveOutput = None cmd = [ CURL_BINARY, - '--silent', + *CURL_ARGS, '--max-time', str(timeout), - '--location', - '--compressed', *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []), *([] if CHECK_SSL_VALIDITY else ['--insecure']), link.url,