diff --git a/archivebox/util.py b/archivebox/util.py index 2a5dc468..1dbb9f21 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -43,7 +43,7 @@ base_url = lambda url: without_scheme(url) # uniq base url used to dedupe links short_ts = lambda ts: ts.split('.')[0] -URL_REGEX = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' +URL_REGEX = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))[^<]+' def check_dependencies():