From 6a8f6f52afe6e822db7a1034b8b5d710204fa314 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sat, 30 Mar 2019 21:29:16 -0400 Subject: [PATCH] 0 mypy errors --- archivebox/index.py | 23 +++++++++++------------ archivebox/logs.py | 3 ++- archivebox/parse.py | 22 ++++++++++++---------- archivebox/purge.py | 17 ++++++++--------- archivebox/util.py | 10 +++++----- 5 files changed, 38 insertions(+), 37 deletions(-) diff --git a/archivebox/index.py b/archivebox/index.py index 3621b35e..d7e230a3 100644 --- a/archivebox/index.py +++ b/archivebox/index.py @@ -3,7 +3,7 @@ import json from datetime import datetime from string import Template -from typing import List, Tuple, Iterator, Optional +from typing import List, Tuple, Iterator, Optional, Mapping from .schema import Link, ArchiveResult from .config import ( @@ -132,8 +132,6 @@ def parse_json_links_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]: def write_html_links_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=False) -> None: """write the html link index to a given path""" - path = os.path.join(out_dir, 'index.html') - copy_and_overwrite( os.path.join(TEMPLATES_DIR, 'static'), os.path.join(out_dir, 'static'), @@ -147,8 +145,9 @@ def write_html_links_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: with open(os.path.join(TEMPLATES_DIR, 'index_row.html'), 'r', encoding='utf-8') as f: link_row_html = f.read() - link_rows = '\n'.join( - Template(link_row_html).substitute(**{ + link_rows = [] + for link in links: + template_row_vars: Mapping[str, str] = { **derived_link_info(link), 'title': ( link.title @@ -162,22 +161,22 @@ def write_html_links_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: 'archive_url': urlencode( wget_output_path(link) or 'index.html' ), - }) - for link in links - ) + } + link_rows.append(Template(link_row_html).substitute(**template_row_vars)) - template_vars = { - 'num_links': len(links), + template_vars: Mapping[str, str] = { + 'num_links': str(len(links)), 'date_updated': datetime.now().strftime('%Y-%m-%d'), 'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'), 'footer_info': FOOTER_INFO, 'version': VERSION, 'git_sha': GIT_SHA, - 'rows': link_rows, + 'rows': '\n'.join(link_rows), 'status': 'finished' if finished else 'running', } + template_html = Template(index_html).substitute(**template_vars) - atomic_write(Template(index_html).substitute(**template_vars), path) + atomic_write(template_html, os.path.join(out_dir, 'index.html')) diff --git a/archivebox/logs.py b/archivebox/logs.py index 155f81e6..d9b92422 100644 --- a/archivebox/logs.py +++ b/archivebox/logs.py @@ -111,6 +111,7 @@ def log_archiving_paused(num_links: int, idx: int, timestamp: str): def log_archiving_finished(num_links: int): end_ts = datetime.now() _LAST_RUN_STATS.archiving_end_ts = end_ts + assert _LAST_RUN_STATS.archiving_start_ts is not None seconds = end_ts.timestamp() - _LAST_RUN_STATS.archiving_start_ts.timestamp() if seconds > 60: duration = '{0:.2f} min'.format(seconds / 60, 2) @@ -194,7 +195,7 @@ def log_archive_method_finished(result: ArchiveResult): ), *hints, '{}Run to see full output:{}'.format(ANSI['lightred'], ANSI['reset']), - *((' cd {};'.format(result.pwd),) if result.pwd else ()), + *([' cd {};'.format(result.pwd)] if result.pwd else []), ' {}'.format(quoted_cmd), ] print('\n'.join( diff --git a/archivebox/parse.py b/archivebox/parse.py index 5c5a6438..49ffa7fd 100644 --- a/archivebox/parse.py +++ b/archivebox/parse.py @@ -266,10 +266,12 @@ def parse_pinboard_rss_export(rss_file: IO[str]) -> Iterable[Link]: root = etree.parse(rss_file).getroot() items = root.findall("{http://purl.org/rss/1.0/}item") for item in items: - url = item.find("{http://purl.org/rss/1.0/}link").text - tags = item.find("{http://purl.org/dc/elements/1.1/}subject").text if item.find("{http://purl.org/dc/elements/1.1/}subject") else None - title = item.find("{http://purl.org/rss/1.0/}title").text.strip() if item.find("{http://purl.org/rss/1.0/}title").text.strip() else None - ts_str = item.find("{http://purl.org/dc/elements/1.1/}date").text if item.find("{http://purl.org/dc/elements/1.1/}date").text else None + find = lambda p: item.find(p).text.strip() if item.find(p) else None # type: ignore + + url = find("{http://purl.org/rss/1.0/}link") + tags = find("{http://purl.org/dc/elements/1.1/}subject") + title = find("{http://purl.org/rss/1.0/}title") + ts_str = find("{http://purl.org/dc/elements/1.1/}date") # Pinboard includes a colon in its date stamp timezone offsets, which # Python can't parse. Remove it: @@ -296,12 +298,12 @@ def parse_medium_rss_export(rss_file: IO[str]) -> Iterable[Link]: rss_file.seek(0) root = etree.parse(rss_file).getroot() - items = root.find("channel").findall("item") + items = root.find("channel").findall("item") # type: ignore for item in items: - url = item.find("link").text - title = item.find("title").text.strip() - ts_str = item.find("pubDate").text - time = datetime.strptime(ts_str, "%a, %d %b %Y %H:%M:%S %Z") + url = item.find("link").text # type: ignore + title = item.find("title").text.strip() # type: ignore + ts_str = item.find("pubDate").text # type: ignore + time = datetime.strptime(ts_str, "%a, %d %b %Y %H:%M:%S %Z") # type: ignore yield Link( url=htmldecode(url), @@ -319,7 +321,7 @@ def parse_plain_text_export(text_file: IO[str]) -> Iterable[Link]: text_file.seek(0) for line in text_file.readlines(): urls = re.findall(URL_REGEX, line) if line.strip() else () - for url in urls: + for url in urls: # type: ignore yield Link( url=htmldecode(url), timestamp=str(datetime.now().timestamp()), diff --git a/archivebox/purge.py b/archivebox/purge.py index 26b18817..ddc64b6b 100755 --- a/archivebox/purge.py +++ b/archivebox/purge.py @@ -6,9 +6,8 @@ from os.path import exists, join from shutil import rmtree from typing import List -from archive import parse_json_link_index -from config import ARCHIVE_DIR, OUTPUT_DIR -from index import write_html_links_index, write_json_links_index +from .config import ARCHIVE_DIR, OUTPUT_DIR +from .index import parse_json_links_index, write_html_links_index, write_json_links_index def cleanup_index(regexes: List[str], proceed: bool, delete: bool) -> None: @@ -16,18 +15,18 @@ def cleanup_index(regexes: List[str], proceed: bool, delete: bool) -> None: exit('index.json is missing; nothing to do') compiled = [re.compile(r) for r in regexes] - links = parse_json_link_index(OUTPUT_DIR)['links'] + links = parse_json_links_index(OUTPUT_DIR) filtered = [] remaining = [] - for l in links: - url = l['url'] + for link in links: + url = link.url for r in compiled: if r.search(url): - filtered.append((l, r)) + filtered.append((link, r)) break else: - remaining.append(l) + remaining.append(link) if not filtered: exit('Search did not match any entries.') @@ -35,7 +34,7 @@ def cleanup_index(regexes: List[str], proceed: bool, delete: bool) -> None: print('Filtered out {}/{} urls:'.format(len(filtered), len(links))) for link, regex in filtered: - url = link['url'] + url = link.url print(' {url} via {regex}'.format(url=url, regex=regex.pattern)) if not proceed: diff --git a/archivebox/util.py b/archivebox/util.py index 9c62526d..bc3fd1a0 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -7,7 +7,7 @@ import shutil from json import JSONEncoder from typing import List, Optional, Any, Union -from inspect import signature, _empty +from inspect import signature from functools import wraps from hashlib import sha256 from urllib.request import Request, urlopen @@ -24,7 +24,7 @@ from subprocess import ( CalledProcessError, ) -from base32_crockford import encode as base32_encode +from base32_crockford import encode as base32_encode # type: ignore from .schema import Link from .config import ( @@ -127,9 +127,9 @@ def enforce_types(func): try: annotation = sig.parameters[arg_key].annotation except KeyError: - annotation = _empty + annotation = None - if annotation is not _empty and annotation.__class__ is type: + if annotation is not None and annotation.__class__ is type: if not isinstance(arg_val, annotation): raise TypeError( '{}(..., {}: {}) got unexpected {} argument {}={}'.format( @@ -605,7 +605,7 @@ def download_url(url: str, timeout: int=TIMEOUT) -> str: insecure = ssl._create_unverified_context() resp = urlopen(req, timeout=timeout, context=insecure) - encoding = resp.headers.get_content_charset() or 'utf-8' + encoding = resp.headers.get_content_charset() or 'utf-8' # type: ignore return resp.read().decode(encoding)