diff --git a/archivebox/config.py b/archivebox/config.py index 0111ebcd..122c7e8d 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -32,7 +32,7 @@ SUBMIT_ARCHIVE_DOT_ORG = os.getenv('SUBMIT_ARCHIVE_DOT_ORG', 'True' CHECK_SSL_VALIDITY = os.getenv('CHECK_SSL_VALIDITY', 'True' ).lower() == 'true' RESOLUTION = os.getenv('RESOLUTION', '1440,2000' ) GIT_DOMAINS = os.getenv('GIT_DOMAINS', 'github.com,bitbucket.org,gitlab.com').split(',') -WGET_USER_AGENT = os.getenv('WGET_USER_AGENT', 'ArchiveBox/{GIT_SHA} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}') # OR: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36 +WGET_USER_AGENT = os.getenv('WGET_USER_AGENT', 'ArchiveBox/{GIT_SHA} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}') CHROME_USER_DATA_DIR = os.getenv('CHROME_USER_DATA_DIR', None) CHROME_BINARY = os.getenv('CHROME_BINARY', None) # change to google-chrome browser if using google-chrome diff --git a/etc/ArchiveBox.conf.default b/etc/ArchiveBox.conf.default index 5e5f73f5..fc07363e 100644 --- a/etc/ArchiveBox.conf.default +++ b/etc/ArchiveBox.conf.default @@ -1,33 +1,48 @@ # Config file uses python syntax but can also be sourced as a .env -ONLY_NEW=False +### To disable specific archive methods, uncomment the line and change it to False +# FETCH_WGET=True +# FETCH_WGET_REQUISITES=True +# FETCH_PDF=True +# FETCH_SCREENSHOT=True +# FETCH_DOM=True +# FETCH_WARC=False +# FETCH_MEDIA=False +# FETCH_GIT=True +# FETCH_FAVICON=True +# SUBMIT_ARCHIVE_DOT_ORG=True -OUTPUT_PERMISSIONS=755 -MEDIA_TIMEOUT=3600 -TIMEOUT=60 +### To only download new links, and never attempt to update old ones, uncomment this line: +# ONLY_NEW=True -FETCH_WGET=True +### To change your ouput folder permissions, uncomment and edit this line: +# OUTPUT_PERMISSIONS=755 -FETCH_WGET=True -FETCH_WGET_REQUISITES=True -FETCH_PDF=True -FETCH_SCREENSHOT=True -FETCH_DOM=True -FETCH_WARC=False -FETCH_GIT=True -FETCH_MEDIA=False -FETCH_FAVICON=True -SUBMIT_ARCHIVE_DOT_ORG=True +### To change how much time is allowed before marking a link as failed, uncomment and edit these lines: +# MEDIA_TIMEOUT=3600 +# TIMEOUT=60 -CHECK_SSL_VALIDITY=True -RESOLUTION="1440,2000" -GIT_DOMAINS="github.com,bitbucket.org,gitlab.com" -WGET_USER_AGENT="ArchiveBox/{GIT_SHA} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}" +### To ignore SSL certificate errors, uncomment this line: +# CHECK_SSL_VALIDITY=False +### To customize the screenshot resolution, uncomment and change this line: +# RESOLUTION="1440,2000" -CHROME_BINARY="chromium-browser" -CHROME_USER_DATA_DIR=None +### To customize which domains we attempted to `git clone`, uncomment and change this line: +# GIT_DOMAINS="github.com,bitbucket.org,gitlab.com" -WGET_BINARY="wget" +### To lie and pretend to be Google Chrome when archiving, uncomment this line: +# WGET_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36" + +### To share a chrome data directory with your main browser, uncomment this line: +# CHROME_USER_DATA_DIR="~/Library/Application Support/Google/Chrome/Default" + +### To use a custom chrome binary, uncomment and set to binary name or full path: +# CHROME_BINARY="chromium-browser" + +### To use a custom wget binary, uncomment and set to binary name or full path: +# WGET_BINARY="wget" + +### To customize Text displayed in html output footer, uncomment and edit this line: +# FOOTER_INFO="Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests." -FOOTER_INFO="Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests."