diff --git a/archivebox/util.py b/archivebox/util.py index fb803732..cd7e9651 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -66,9 +66,9 @@ URL_REGEX = re.compile( re.IGNORECASE, ) HTML_TITLE_REGEX = re.compile( - r'' # start matching text after <title> tag + r'<title.*?>' # start matching text after <title> tag r'(.[^<>]+)', # get everything up to these symbols - re.IGNORECASE, + re.IGNORECASE | re.MULTILINE | re.DOTALL | re.UNICODE, ) ### Checks & Tests