diff --git a/archivebox/archive_methods.py b/archivebox/archive_methods.py index 39903d8c..20e35c28 100644 --- a/archivebox/archive_methods.py +++ b/archivebox/archive_methods.py @@ -30,6 +30,7 @@ from config import ( SUBMIT_ARCHIVE_DOT_ORG, COOKIES_FILE, WGET_USER_AGENT, + CHROME_USER_AGENT, CHROME_USER_DATA_DIR, CHROME_HEADLESS, CHROME_SANDBOX, @@ -266,6 +267,7 @@ def fetch_pdf(link_dir, link, timeout=TIMEOUT, user_data_dir=CHROME_USER_DATA_DI '--hide-scrollbars', '--timeout={}'.format((timeout) * 1000), *(() if CHECK_SSL_VALIDITY else ('--disable-web-security', '--ignore-certificate-errors')), + *(('--user-agent={}'.format(CHROME_USER_AGENT),) if CHROME_USER_AGENT else ()), link['url'] ] end = progress(timeout, prefix=' ') @@ -304,6 +306,7 @@ def fetch_screenshot(link_dir, link, timeout=TIMEOUT, user_data_dir=CHROME_USER_ '--hide-scrollbars', '--timeout={}'.format((timeout) * 1000), *(() if CHECK_SSL_VALIDITY else ('--disable-web-security', '--ignore-certificate-errors')), + *(('--user-agent={}'.format(CHROME_USER_AGENT),) if CHROME_USER_AGENT else ()), # '--full-page', # TODO: make this actually work using ./bin/screenshot fullPage: true link['url'], ] @@ -342,6 +345,7 @@ def fetch_dom(link_dir, link, timeout=TIMEOUT, user_data_dir=CHROME_USER_DATA_DI *chrome_headless(user_data_dir=user_data_dir), '--dump-dom', '--timeout={}'.format((timeout) * 1000), + *(('--user-agent={}'.format(CHROME_USER_AGENT),) if CHROME_USER_AGENT else ()), link['url'] ] end = progress(timeout, prefix=' ') diff --git a/etc/ArchiveBox.conf.default b/etc/ArchiveBox.conf.default index ce7b1cda..dcb8aeac 100644 --- a/etc/ArchiveBox.conf.default +++ b/etc/ArchiveBox.conf.default @@ -40,7 +40,8 @@ #CHECK_SSL_VALIDITY=True #FETCH_WGET_REQUISITES=True #RESOLUTION="1440,900" -#WGET_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36" +#WGET_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36" +#CHROME_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36" #GIT_DOMAINS="github.com,bitbucket.org,gitlab.com" #COOKIES_FILE="path/to/cookies.txt" #CHROME_USER_DATA_DIR="~/.config/google-chrome/Default"