From 0965031d8f5c86e3e89352800f02e38a1a194133 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Wed, 22 Jul 2020 01:46:38 -0400 Subject: [PATCH] fix archive_org header rename --- archivebox/extractors/archive_org.py | 2 +- archivebox/extractors/wget.py | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/archivebox/extractors/archive_org.py b/archivebox/extractors/archive_org.py index 656beb25..77cde22d 100644 --- a/archivebox/extractors/archive_org.py +++ b/archivebox/extractors/archive_org.py @@ -106,7 +106,7 @@ def parse_archive_dot_org_response(response: bytes) -> Tuple[List[str], List[str headers[name.lower().strip()].append(val.strip()) # Get successful archive url in "content-location" header or any errors - content_location = headers['content-location'] + content_location = headers.get('content-location', headers['location']) errors = headers['x-archive-wayback-runtime-error'] return content_location, errors diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py index 3221d8bd..0e6cdafa 100644 --- a/archivebox/extractors/wget.py +++ b/archivebox/extractors/wget.py @@ -97,21 +97,20 @@ def save_wget(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> if 'Downloaded:' in output_tail[-1] else 0 ) + hints = ( + 'Got wget response code: {}.'.format(result.returncode), + *output_tail, + ) # Check for common failure cases - if result.returncode > 0 and files_downloaded < 1: - hints = ( - 'Got wget response code: {}.'.format(result.returncode), - *output_tail, - ) + if (result.returncode > 0 and files_downloaded < 1) or output is None: if b'403: Forbidden' in result.stderr: raise ArchiveError('403 Forbidden (try changing WGET_USER_AGENT)', hints) if b'404: Not Found' in result.stderr: raise ArchiveError('404 Not Found', hints) if b'ERROR 500: Internal Server Error' in result.stderr: raise ArchiveError('500 Internal Server Error', hints) - raise ArchiveError('Got an error from the server', hints) - + raise ArchiveError('Wget failed or got an error from the server', hints) chmod_file(output, cwd=out_dir) except Exception as err: status = 'failed'