From 2cd11feaac6f4e34fdee2721eae6620573506354 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 17 Apr 2018 03:22:59 -0400 Subject: [PATCH] cleanup console logging messages --- archive_methods.py | 9 +++++++-- index.py | 8 ++++---- links.py | 9 ++++++++- parse.py | 8 ++++++-- 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/archive_methods.py b/archive_methods.py index 98bf63f9..22d96cdb 100644 --- a/archive_methods.py +++ b/archive_methods.py @@ -4,6 +4,8 @@ from functools import wraps from datetime import datetime from subprocess import run, PIPE, DEVNULL +from peekable import Peekable + from index import html_appended_url, parse_json_link_index, write_link_index from links import links_after_timestamp from config import ( @@ -40,16 +42,19 @@ _RESULTS_TOTALS = { # globals are bad, mmkay def archive_links(archive_path, links, source=None, resume=None): check_dependencies() - to_archive = links_after_timestamp(links, resume) + to_archive = Peekable(links_after_timestamp(links, resume)) + idx, link = 0, to_archive.peek(0) try: for idx, link in enumerate(to_archive): link_dir = os.path.join(archive_path, link['timestamp']) archive_link(link_dir, link) except (KeyboardInterrupt, SystemExit, Exception) as e: - print('{red}[X] Index is up-to-date, archive update paused on link {idx}/{total}{reset}'.format( + print('⏸ [{now}] {lightyellow}Downloading paused on link {timestamp} ({idx}/{total}){reset}'.format( **ANSI, + now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), idx=idx, + timestamp=link['timestamp'], total=len(list(to_archive)), )) print(' Continue where you left off by running:') diff --git a/index.py b/index.py index 42b1c85f..3882e1a8 100644 --- a/index.py +++ b/index.py @@ -28,15 +28,15 @@ def write_links_index(out_dir, links): if not os.path.exists(out_dir): os.makedirs(out_dir) - print('[i] [{}] Updating {}{}{} links in archive index...'.format( + write_json_links_index(out_dir, links) + write_html_links_index(out_dir, links) + + print('[√] [{}] Archive index is now up-to-date with {}{}{} links.'.format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), ANSI['green'], len(links), ANSI['reset'], )) - - write_json_links_index(out_dir, links) - write_html_links_index(out_dir, links) def write_json_links_index(out_dir, links): """write the json link index to a given path""" diff --git a/links.py b/links.py index 4365469c..45143089 100644 --- a/links.py +++ b/links.py @@ -32,6 +32,8 @@ Link { """ +import datetime + from util import ( domain, base_url, @@ -39,6 +41,7 @@ from util import ( get_link_type, merge_links, ) +from config import ANSI def validate_links(links): @@ -95,7 +98,11 @@ def links_after_timestamp(links, timestamp=None): yield from links return - print('[.] [{}] Resuming...'.format(timestamp)) + print('▶️ [{}] {green}Resuming downloads at {}...{reset}'.format( + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + timestamp, + **ANSI, + )) for link in links: try: if float(link['timestamp']) <= float(timestamp): diff --git a/parse.py b/parse.py index 923ecbed..682ee403 100644 --- a/parse.py +++ b/parse.py @@ -184,6 +184,10 @@ def parse_pinboard_rss_feed(rss_file): tags = item.find("{http://purl.org/dc/elements/1.1/}subject").text title = item.find("{http://purl.org/rss/1.0/}title").text ts_str = item.find("{http://purl.org/dc/elements/1.1/}date").text + # = 🌈🌈🌈🌈 + # = 🌈🌈🌈🌈 + # = 🏆🏆🏆🏆 + # Pinboard includes a colon in its date stamp timezone offsets, which # Python can't parse. Remove it: if ":" == ts_str[-3:-2]: @@ -208,8 +212,8 @@ def parse_medium_rss_feed(rss_file): root = etree.parse(rss_file).getroot() items = root.find("channel").findall("item") for item in items: - for child in item: - print(child.tag, child.text) + # for child in item: + # print(child.tag, child.text) url = item.find("link").text title = item.find("title").text ts_str = item.find("pubDate").text