diff --git a/archivebox/config.py b/archivebox/config.py index 1edd2eeb..fad2db53 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -500,7 +500,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { 'LOGS_DIR': {'default': lambda c: c['OUTPUT_DIR'] / LOGS_DIR_NAME}, 'CONFIG_FILE': {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME}, 'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()}, - 'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None)}, # None means unset, so we autodetect it with find_chrome_Data_dir(), but emptystring '' means user manually set it to '', and we should store it as None + 'CHROME_USER_DATA_DIR': {'default': lambda c: Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None}, 'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)}, 'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)}, 'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')}, # exec is always needed to list directories @@ -910,27 +910,36 @@ def find_chrome_binary() -> Optional[str]: def find_chrome_data_dir() -> Optional[str]: """find any installed chrome user data directories in the default locations""" - # Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev - # make sure data dir finding precedence order always matches binary finding order - default_profile_paths = ( - '~/.config/chromium', - '~/Library/Application Support/Chromium', - '~/AppData/Local/Chromium/User Data', - '~/.config/chrome', - '~/.config/google-chrome', - '~/Library/Application Support/Google/Chrome', - '~/AppData/Local/Google/Chrome/User Data', - '~/.config/google-chrome-stable', - '~/.config/google-chrome-beta', - '~/Library/Application Support/Google/Chrome Canary', - '~/AppData/Local/Google/Chrome SxS/User Data', - '~/.config/google-chrome-unstable', - '~/.config/google-chrome-dev', - ) - for path in default_profile_paths: - full_path = Path(path).resolve() - if full_path.exists(): - return full_path + # deprecated because this is DANGEROUS, do not re-implement/uncomment this behavior. + + # Going forward we want to discourage people from using their main chrome profile for archiving. + # Session tokens, personal data, and cookies are often returned in server responses, + # when they get archived, they are essentially burned as anyone who can view the archive + # can use that data to masquerade as the logged-in user that did the archiving. + # For this reason users should always create dedicated burner profiles for archiving and not use + # their daily driver main accounts. + + # # Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev + # # make sure data dir finding precedence order always matches binary finding order + # default_profile_paths = ( + # '~/.config/chromium', + # '~/Library/Application Support/Chromium', + # '~/AppData/Local/Chromium/User Data', + # '~/.config/chrome', + # '~/.config/google-chrome', + # '~/Library/Application Support/Google/Chrome', + # '~/AppData/Local/Google/Chrome/User Data', + # '~/.config/google-chrome-stable', + # '~/.config/google-chrome-beta', + # '~/Library/Application Support/Google/Chrome Canary', + # '~/AppData/Local/Google/Chrome SxS/User Data', + # '~/.config/google-chrome-unstable', + # '~/.config/google-chrome-dev', + # ) + # for path in default_profile_paths: + # full_path = Path(path).resolve() + # if full_path.exists(): + # return full_path return None def wget_supports_compression(config):