From ee93807a0a9231ac0c358847cb8dce0b5d77e1db Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 4 Feb 2019 22:07:48 -0800 Subject: [PATCH] tweak wording of parser cli output --- archivebox/archive.py | 7 +++---- archivebox/parse.py | 37 +++++++++++++++++++++++-------------- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/archivebox/archive.py b/archivebox/archive.py index aeddf8b3..eeee6971 100755 --- a/archivebox/archive.py +++ b/archivebox/archive.py @@ -67,14 +67,13 @@ def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False): if archive_path: existing_links = parse_json_links_index(archive_path) all_links = validate_links(existing_links + all_links) - + num_new_links = len(all_links) - len(existing_links) if num_new_links and not only_new: - print('[{green}+{reset}] [{}] Adding {} new links from {} to {}/index.json (detected {} format)'.format( + print('{green}[+] [{}] Adding {} new links to index from {} ({} format){reset}'.format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), num_new_links, pretty_path(import_path), - pretty_path(archive_path), parser_name, **ANSI, )) @@ -103,7 +102,7 @@ def update_archive(archive_path, links, source=None, resume=None, append=True): **ANSI, )) else: - print('{green}[▶] [{}] Updating files for {} links in archive...{reset}'.format( + print('{green}[▶] [{}] Downloading content for {} pages in archive...{reset}'.format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), len(links), **ANSI, diff --git a/archivebox/parse.py b/archivebox/parse.py index ea9579b7..ca7c9628 100644 --- a/archivebox/parse.py +++ b/archivebox/parse.py @@ -25,6 +25,7 @@ import xml.etree.ElementTree as etree from datetime import datetime +from config import ANSI from util import ( domain, base_url, @@ -39,14 +40,14 @@ def get_parsers(file): """return all parsers that work on a given file, defaults to all of them""" return OrderedDict([ - ('pocket', parse_pocket_export), - ('pinboard', parse_json_export), - ('bookmarks', parse_bookmarks_export), - ('rss', parse_rss_export), - ('pinboard_rss', parse_pinboard_rss_feed), - ('shaarli_rss', parse_shaarli_rss_export), - ('medium_rss', parse_medium_rss_feed), - ('plain_text', parse_plain_text), + ('Pocket HTML', parse_pocket_html_export), + ('Pinboard JSON', parse_pinboard_json_export), + ('Netscape HTML', parse_netscape_html_export), + ('RSS', parse_rss_export), + ('Pinboard RSS', parse_pinboard_rss_export), + ('Shaarli RSS', parse_shaarli_rss_export), + ('Medium RSS', parse_medium_rss_export), + ('Plain Text', parse_plain_text_export), ]) def parse_links(path): @@ -54,6 +55,12 @@ def parse_links(path): links = [] with open(path, 'r', encoding='utf-8') as file: + print('{green}[*] [{}] Parsing new links from output/sources/{} and fetching titles...{reset}'.format( + datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + path.rsplit('/', 1)[-1], + **ANSI, + )) + for parser_name, parser_func in get_parsers(file).items(): # otherwise try all parsers until one works try: @@ -64,10 +71,12 @@ def parse_links(path): # parser not supported on this file pass + print() + return links, parser_name -def parse_pocket_export(html_file): +def parse_pocket_html_export(html_file): """Parse Pocket-format bookmarks export files (produced by getpocket.com/export/)""" html_file.seek(0) @@ -91,7 +100,7 @@ def parse_pocket_export(html_file): info['type'] = get_link_type(info) yield info -def parse_json_export(json_file): +def parse_pinboard_json_export(json_file): """Parse JSON-format bookmarks export files (produced by pinboard.in/export/, or wallabag)""" json_file.seek(0) json_content = json.load(json_file) @@ -210,7 +219,7 @@ def parse_shaarli_rss_export(rss_file): yield info -def parse_bookmarks_export(html_file): +def parse_netscape_html_export(html_file): """Parse netscape-format bookmarks export files (produced by all browsers)""" html_file.seek(0) @@ -237,7 +246,7 @@ def parse_bookmarks_export(html_file): yield info -def parse_pinboard_rss_feed(rss_file): +def parse_pinboard_rss_export(rss_file): """Parse Pinboard RSS feed files into links""" rss_file.seek(0) @@ -269,7 +278,7 @@ def parse_pinboard_rss_feed(rss_file): info['type'] = get_link_type(info) yield info -def parse_medium_rss_feed(rss_file): +def parse_medium_rss_export(rss_file): """Parse Medium RSS feed files into links""" rss_file.seek(0) @@ -295,7 +304,7 @@ def parse_medium_rss_feed(rss_file): yield info -def parse_plain_text(text_file): +def parse_plain_text_export(text_file): """Parse raw links from each line in a text file""" text_file.seek(0)