From 127c72bd79349f9fad805dbd7f5434b77736961d Mon Sep 17 00:00:00 2001 From: noncetonic Date: Tue, 19 Mar 2019 05:30:06 -0700 Subject: [PATCH] Adds HEADLESS_USER_AGENT variable Allows setting Headless Chrome's User-Agent to bypass rudimentary anti-scraper/anti-bot checks by sites. https://intoli.com/blog/making-chrome-headless-undetectable/ has more detections if there is desire to get serious about anti-detection --- etc/ArchiveBox.conf.default | 1 + 1 file changed, 1 insertion(+) diff --git a/etc/ArchiveBox.conf.default b/etc/ArchiveBox.conf.default index ce7b1cda..67bc6b2d 100644 --- a/etc/ArchiveBox.conf.default +++ b/etc/ArchiveBox.conf.default @@ -41,6 +41,7 @@ #FETCH_WGET_REQUISITES=True #RESOLUTION="1440,900" #WGET_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36" +#HEADLESS_USER_AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36" #GIT_DOMAINS="github.com,bitbucket.org,gitlab.com" #COOKIES_FILE="path/to/cookies.txt" #CHROME_USER_DATA_DIR="~/.config/google-chrome/Default"