From d6de04a83ad0963c1b36209e124a66358d09aab6 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sat, 30 Jan 2021 06:07:35 -0500 Subject: [PATCH] fix lgtm errors --- archivebox/core/settings.py | 2 ++ archivebox/extractors/favicon.py | 3 +-- archivebox/index/__init__.py | 2 +- archivebox/parsers/generic_txt.py | 4 ++-- archivebox/parsers/wallabag_atom.py | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index bcf9c073..918e15e9 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -33,6 +33,8 @@ LOGOUT_REDIRECT_URL = '/' PASSWORD_RESET_URL = '/accounts/password_reset/' APPEND_SLASH = True +DEBUG = DEBUG or sys.environ.get('DEBUG', 'false').lower() != 'false' or '--debug' in sys.argv + INSTALLED_APPS = [ 'django.contrib.auth', 'django.contrib.contenttypes', diff --git a/archivebox/extractors/favicon.py b/archivebox/extractors/favicon.py index 3a4aeea7..b8831d0c 100644 --- a/archivebox/extractors/favicon.py +++ b/archivebox/extractors/favicon.py @@ -42,14 +42,13 @@ def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) *([] if CHECK_SSL_VALIDITY else ['--insecure']), 'https://www.google.com/s2/favicons?domain={}'.format(domain(link.url)), ] - status = 'pending' + status = 'failed' timer = TimedProgress(timeout, prefix=' ') try: run(cmd, cwd=str(out_dir), timeout=timeout) chmod_file(output, cwd=str(out_dir)) status = 'succeeded' except Exception as err: - status = 'failed' output = err finally: timer.end() diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py index 8eab1d38..04ab0a8d 100644 --- a/archivebox/index/__init__.py +++ b/archivebox/index/__init__.py @@ -2,7 +2,6 @@ __package__ = 'archivebox.index' import os import shutil -import json as pyjson from pathlib import Path from itertools import chain @@ -42,6 +41,7 @@ from .html import ( write_html_link_details, ) from .json import ( + pyjson, parse_json_link_details, write_json_link_details, ) diff --git a/archivebox/parsers/generic_txt.py b/archivebox/parsers/generic_txt.py index e296ec7e..94dd523c 100644 --- a/archivebox/parsers/generic_txt.py +++ b/archivebox/parsers/generic_txt.py @@ -51,9 +51,9 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]: # look inside the URL for any sub-urls, e.g. for archive.org links # https://web.archive.org/web/20200531203453/https://www.reddit.com/r/socialism/comments/gu24ke/nypd_officers_claim_they_are_protecting_the_rule/fsfq0sw/ # -> https://www.reddit.com/r/socialism/comments/gu24ke/nypd_officers_claim_they_are_protecting_the_rule/fsfq0sw/ - for url in re.findall(URL_REGEX, line[1:]): + for sub_url in re.findall(URL_REGEX, line[1:]): yield Link( - url=htmldecode(url), + url=htmldecode(sub_url), timestamp=str(datetime.now().timestamp()), title=None, tags=None, diff --git a/archivebox/parsers/wallabag_atom.py b/archivebox/parsers/wallabag_atom.py index 0d77869f..7acfc2fc 100644 --- a/archivebox/parsers/wallabag_atom.py +++ b/archivebox/parsers/wallabag_atom.py @@ -45,7 +45,7 @@ def parse_wallabag_atom_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]: time = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S%z") try: tags = str_between(get_row('category'), 'label="', '" />') - except: + except Exception: tags = None yield Link(