From 56d382235fd456d5c3819c112a6c903cc7daca12 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Wed, 6 Feb 2019 22:06:21 -0800 Subject: [PATCH] better progress output --- archivebox/archive.py | 29 ++++++++++++----------------- archivebox/archive_methods.py | 4 ++-- archivebox/index.py | 12 +++++++----- archivebox/parse.py | 7 ++++--- archivebox/util.py | 5 +++-- 5 files changed, 28 insertions(+), 29 deletions(-) diff --git a/archivebox/archive.py b/archivebox/archive.py index b3483284..c17870ff 100755 --- a/archivebox/archive.py +++ b/archivebox/archive.py @@ -25,8 +25,10 @@ from config import ( ONLY_NEW, OUTPUT_PERMISSIONS, OUTPUT_DIR, + REPO_DIR, ANSI, TIMEOUT, + SHOW_PROGRESS, GIT_SHA, ) from util import ( @@ -69,21 +71,13 @@ def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False): all_links = validate_links(existing_links + all_links) num_new_links = len(all_links) - len(existing_links) - if num_new_links and not only_new: - print('{green}[+] [{}] Adding {} new links to index from {} ({} format){reset}'.format( - datetime.now().strftime('%Y-%m-%d %H:%M:%S'), - num_new_links, - pretty_path(import_path), - parser_name, - **ANSI, - )) - # else: - # print('[*] [{}] No new links added to {}/index.json{}'.format( - # datetime.now().strftime('%Y-%m-%d %H:%M:%S'), - # archive_path, - # ' from {}'.format(import_path) if import_path else '', - # **ANSI, - # )) + if SHOW_PROGRESS: + print() + print(' > Adding {} new links to index from {} (parsed as {} format)'.format( + num_new_links, + pretty_path(import_path), + parser_name, + )) if only_new: return new_links(all_links, existing_links) @@ -102,7 +96,7 @@ def update_archive(archive_path, links, source=None, resume=None, append=True): **ANSI, )) else: - print('{green}[▶] [{}] Downloading content for {} pages in archive...{reset}'.format( + print('{green}[▶] [{}] Updating content for {} pages in archive...{reset}'.format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), len(links), **ANSI, @@ -119,7 +113,7 @@ def update_archive(archive_path, links, source=None, resume=None, append=True): else: duration = '{0:.2f} sec'.format(seconds, 2) - print('{}[√] [{}] Update of {} links complete ({}){}'.format( + print('{}[√] [{}] Update of {} pages complete ({}){}'.format( ANSI['green'], datetime.now().strftime('%Y-%m-%d %H:%M:%S'), len(links), @@ -129,6 +123,7 @@ def update_archive(archive_path, links, source=None, resume=None, append=True): print(' - {} entries skipped'.format(_RESULTS_TOTALS['skipped'])) print(' - {} entries updated'.format(_RESULTS_TOTALS['succeded'])) print(' - {} errors'.format(_RESULTS_TOTALS['failed'])) + print(' To view your archive, open: {}/index.html'.format(OUTPUT_DIR.replace(REPO_DIR + '/', ''))) if __name__ == '__main__': diff --git a/archivebox/archive_methods.py b/archivebox/archive_methods.py index 14583545..9e00070f 100644 --- a/archivebox/archive_methods.py +++ b/archivebox/archive_methods.py @@ -134,8 +134,8 @@ def log_link_archive(link_dir, link, update_existing): )) print(' > {}{}'.format(pretty_path(link_dir), '' if update_existing else ' (new)')) - if link['type']: - print(' i {}'.format(link['type'])) + # if link['type']: + # print(' i {}'.format(link['type'])) diff --git a/archivebox/index.py b/archivebox/index.py index d8cf5b67..cc3ccf20 100644 --- a/archivebox/index.py +++ b/archivebox/index.py @@ -28,14 +28,16 @@ def write_links_index(out_dir, links): if not os.path.exists(out_dir): os.makedirs(out_dir) - write_json_links_index(out_dir, links) - write_html_links_index(out_dir, links) - - print('{green}[√] [{}] Updated main index files:{reset}'.format( + print('{green}[*] [{}] Updating main index files...{reset}'.format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), - **ANSI)) + **ANSI, + )) + write_json_links_index(out_dir, links) print(' > {}/index.json'.format(pretty_path(out_dir))) + + write_html_links_index(out_dir, links) print(' > {}/index.html'.format(pretty_path(out_dir))) + def write_json_links_index(out_dir, links): """write the json link index to a given path""" diff --git a/archivebox/parse.py b/archivebox/parse.py index ae38d019..8093672a 100644 --- a/archivebox/parse.py +++ b/archivebox/parse.py @@ -18,6 +18,7 @@ Parsed link schema: { """ import re +import sys import json import urllib from collections import OrderedDict @@ -25,7 +26,7 @@ import xml.etree.ElementTree as etree from datetime import datetime -from config import ANSI +from config import ANSI, SHOW_PROGRESS from util import ( domain, base_url, @@ -60,6 +61,8 @@ def parse_links(path): path.rsplit('/', 1)[-1], **ANSI, )) + if SHOW_PROGRESS: + sys.stdout.write(' ') for parser_name, parser_func in get_parsers(file).items(): # otherwise try all parsers until one works @@ -72,8 +75,6 @@ def parse_links(path): # print('[!] Parser {} failed: {} {}'.format(parser_name, err.__class__.__name__, err)) pass - print() - return links, parser_name diff --git a/archivebox/util.py b/archivebox/util.py index 6ddb68ab..6a91dd76 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -233,8 +233,9 @@ def fetch_page_title(url, default=True): default = url try: - sys.stdout.write('.') - sys.stdout.flush() + if SHOW_PROGRESS: + sys.stdout.write('.') + sys.stdout.flush() html_content = urllib.request.urlopen(url, timeout=10).read().decode('utf-8') match = re.search('(.*?)', html_content) return match.group(1) if match else default or None