fix fetch page title default

2024-06-23 08:30:29 +12:00 · 2019-01-11 05:19:22 -05:00 · 2019-01-11 05:19:22 -05:00 · d35c6cf8b5
parent 67d103a293
commit d35c6cf8b5
1 changed files with 7 additions and 3 deletions
--- a/archivebox/util.py
+++ b/archivebox/util.py
@ -212,15 +212,19 @@ def download_url(url):
    return source_path


-def fetch_page_title(url, default=None):
+def fetch_page_title(url, default=True):
    """Attempt to guess a page's title by downloading the html"""
-    
+    if default is True:
+        default = url
+
    try:
        html_content = urllib.request.urlopen(url).read().decode('utf-8')

        match = re.search('<title>(.*?)</title>', html_content)
-        return match.group(1) if match else default
+        return match.group(1) if match else default or None
    except Exception:
+        if default is False:
+            raise
        return default