1
0
Fork 0
mirror of synced 2024-06-28 19:10:33 +12:00

cleanup console logging messages

This commit is contained in:
Nick Sweeting 2018-04-17 03:22:59 -04:00
parent 1ea695d7b7
commit 2cd11feaac
4 changed files with 25 additions and 9 deletions

View file

@ -4,6 +4,8 @@ from functools import wraps
from datetime import datetime
from subprocess import run, PIPE, DEVNULL
from peekable import Peekable
from index import html_appended_url, parse_json_link_index, write_link_index
from links import links_after_timestamp
from config import (
@ -40,16 +42,19 @@ _RESULTS_TOTALS = { # globals are bad, mmkay
def archive_links(archive_path, links, source=None, resume=None):
check_dependencies()
to_archive = links_after_timestamp(links, resume)
to_archive = Peekable(links_after_timestamp(links, resume))
idx, link = 0, to_archive.peek(0)
try:
for idx, link in enumerate(to_archive):
link_dir = os.path.join(archive_path, link['timestamp'])
archive_link(link_dir, link)
except (KeyboardInterrupt, SystemExit, Exception) as e:
print('{red}[X] Index is up-to-date, archive update paused on link {idx}/{total}{reset}'.format(
print('⏸ [{now}] {lightyellow}Downloading paused on link {timestamp} ({idx}/{total}){reset}'.format(
**ANSI,
now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
idx=idx,
timestamp=link['timestamp'],
total=len(list(to_archive)),
))
print(' Continue where you left off by running:')

View file

@ -28,15 +28,15 @@ def write_links_index(out_dir, links):
if not os.path.exists(out_dir):
os.makedirs(out_dir)
print('[i] [{}] Updating {}{}{} links in archive index...'.format(
write_json_links_index(out_dir, links)
write_html_links_index(out_dir, links)
print('[√] [{}] Archive index is now up-to-date with {}{}{} links.'.format(
datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
ANSI['green'],
len(links),
ANSI['reset'],
))
write_json_links_index(out_dir, links)
write_html_links_index(out_dir, links)
def write_json_links_index(out_dir, links):
"""write the json link index to a given path"""

View file

@ -32,6 +32,8 @@ Link {
"""
import datetime
from util import (
domain,
base_url,
@ -39,6 +41,7 @@ from util import (
get_link_type,
merge_links,
)
from config import ANSI
def validate_links(links):
@ -95,7 +98,11 @@ def links_after_timestamp(links, timestamp=None):
yield from links
return
print('[.] [{}] Resuming...'.format(timestamp))
print('▶️ [{}] {green}Resuming downloads at {}...{reset}'.format(
datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
timestamp,
**ANSI,
))
for link in links:
try:
if float(link['timestamp']) <= float(timestamp):

View file

@ -184,6 +184,10 @@ def parse_pinboard_rss_feed(rss_file):
tags = item.find("{http://purl.org/dc/elements/1.1/}subject").text
title = item.find("{http://purl.org/rss/1.0/}title").text
ts_str = item.find("{http://purl.org/dc/elements/1.1/}date").text
# = 🌈🌈🌈🌈
# = 🌈🌈🌈🌈
# = 🏆🏆🏆🏆
# Pinboard includes a colon in its date stamp timezone offsets, which
# Python can't parse. Remove it:
if ":" == ts_str[-3:-2]:
@ -208,8 +212,8 @@ def parse_medium_rss_feed(rss_file):
root = etree.parse(rss_file).getroot()
items = root.find("channel").findall("item")
for item in items:
for child in item:
print(child.tag, child.text)
# for child in item:
# print(child.tag, child.text)
url = item.find("link").text
title = item.find("title").text
ts_str = item.find("pubDate").text