From d35c6cf8b56f796148cd39a81a5fce2413f977d4 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Fri, 11 Jan 2019 05:19:22 -0500 Subject: [PATCH] fix fetch page title default --- archivebox/util.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/archivebox/util.py b/archivebox/util.py index 681218fe..265815ff 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -212,15 +212,19 @@ def download_url(url): return source_path -def fetch_page_title(url, default=None): +def fetch_page_title(url, default=True): """Attempt to guess a page's title by downloading the html""" - + if default is True: + default = url + try: html_content = urllib.request.urlopen(url).read().decode('utf-8') match = re.search('(.*?)', html_content) - return match.group(1) if match else default + return match.group(1) if match else default or None except Exception: + if default is False: + raise return default