1
0
Fork 0
mirror of synced 2024-07-01 12:30:24 +12:00

tweak wording of parser cli output

This commit is contained in:
Nick Sweeting 2019-02-04 22:07:48 -08:00
parent ca8f57ef5c
commit ee93807a0a
2 changed files with 26 additions and 18 deletions

View file

@ -67,14 +67,13 @@ def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False):
if archive_path: if archive_path:
existing_links = parse_json_links_index(archive_path) existing_links = parse_json_links_index(archive_path)
all_links = validate_links(existing_links + all_links) all_links = validate_links(existing_links + all_links)
num_new_links = len(all_links) - len(existing_links) num_new_links = len(all_links) - len(existing_links)
if num_new_links and not only_new: if num_new_links and not only_new:
print('[{green}+{reset}] [{}] Adding {} new links from {} to {}/index.json (detected {} format)'.format( print('{green}[+] [{}] Adding {} new links to index from {} ({} format){reset}'.format(
datetime.now().strftime('%Y-%m-%d %H:%M:%S'), datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
num_new_links, num_new_links,
pretty_path(import_path), pretty_path(import_path),
pretty_path(archive_path),
parser_name, parser_name,
**ANSI, **ANSI,
)) ))
@ -103,7 +102,7 @@ def update_archive(archive_path, links, source=None, resume=None, append=True):
**ANSI, **ANSI,
)) ))
else: else:
print('{green}[▶] [{}] Updating files for {} links in archive...{reset}'.format( print('{green}[▶] [{}] Downloading content for {} pages in archive...{reset}'.format(
datetime.now().strftime('%Y-%m-%d %H:%M:%S'), datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
len(links), len(links),
**ANSI, **ANSI,

View file

@ -25,6 +25,7 @@ import xml.etree.ElementTree as etree
from datetime import datetime from datetime import datetime
from config import ANSI
from util import ( from util import (
domain, domain,
base_url, base_url,
@ -39,14 +40,14 @@ def get_parsers(file):
"""return all parsers that work on a given file, defaults to all of them""" """return all parsers that work on a given file, defaults to all of them"""
return OrderedDict([ return OrderedDict([
('pocket', parse_pocket_export), ('Pocket HTML', parse_pocket_html_export),
('pinboard', parse_json_export), ('Pinboard JSON', parse_pinboard_json_export),
('bookmarks', parse_bookmarks_export), ('Netscape HTML', parse_netscape_html_export),
('rss', parse_rss_export), ('RSS', parse_rss_export),
('pinboard_rss', parse_pinboard_rss_feed), ('Pinboard RSS', parse_pinboard_rss_export),
('shaarli_rss', parse_shaarli_rss_export), ('Shaarli RSS', parse_shaarli_rss_export),
('medium_rss', parse_medium_rss_feed), ('Medium RSS', parse_medium_rss_export),
('plain_text', parse_plain_text), ('Plain Text', parse_plain_text_export),
]) ])
def parse_links(path): def parse_links(path):
@ -54,6 +55,12 @@ def parse_links(path):
links = [] links = []
with open(path, 'r', encoding='utf-8') as file: with open(path, 'r', encoding='utf-8') as file:
print('{green}[*] [{}] Parsing new links from output/sources/{} and fetching titles...{reset}'.format(
datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
path.rsplit('/', 1)[-1],
**ANSI,
))
for parser_name, parser_func in get_parsers(file).items(): for parser_name, parser_func in get_parsers(file).items():
# otherwise try all parsers until one works # otherwise try all parsers until one works
try: try:
@ -64,10 +71,12 @@ def parse_links(path):
# parser not supported on this file # parser not supported on this file
pass pass
print()
return links, parser_name return links, parser_name
def parse_pocket_export(html_file): def parse_pocket_html_export(html_file):
"""Parse Pocket-format bookmarks export files (produced by getpocket.com/export/)""" """Parse Pocket-format bookmarks export files (produced by getpocket.com/export/)"""
html_file.seek(0) html_file.seek(0)
@ -91,7 +100,7 @@ def parse_pocket_export(html_file):
info['type'] = get_link_type(info) info['type'] = get_link_type(info)
yield info yield info
def parse_json_export(json_file): def parse_pinboard_json_export(json_file):
"""Parse JSON-format bookmarks export files (produced by pinboard.in/export/, or wallabag)""" """Parse JSON-format bookmarks export files (produced by pinboard.in/export/, or wallabag)"""
json_file.seek(0) json_file.seek(0)
json_content = json.load(json_file) json_content = json.load(json_file)
@ -210,7 +219,7 @@ def parse_shaarli_rss_export(rss_file):
yield info yield info
def parse_bookmarks_export(html_file): def parse_netscape_html_export(html_file):
"""Parse netscape-format bookmarks export files (produced by all browsers)""" """Parse netscape-format bookmarks export files (produced by all browsers)"""
html_file.seek(0) html_file.seek(0)
@ -237,7 +246,7 @@ def parse_bookmarks_export(html_file):
yield info yield info
def parse_pinboard_rss_feed(rss_file): def parse_pinboard_rss_export(rss_file):
"""Parse Pinboard RSS feed files into links""" """Parse Pinboard RSS feed files into links"""
rss_file.seek(0) rss_file.seek(0)
@ -269,7 +278,7 @@ def parse_pinboard_rss_feed(rss_file):
info['type'] = get_link_type(info) info['type'] = get_link_type(info)
yield info yield info
def parse_medium_rss_feed(rss_file): def parse_medium_rss_export(rss_file):
"""Parse Medium RSS feed files into links""" """Parse Medium RSS feed files into links"""
rss_file.seek(0) rss_file.seek(0)
@ -295,7 +304,7 @@ def parse_medium_rss_feed(rss_file):
yield info yield info
def parse_plain_text(text_file): def parse_plain_text_export(text_file):
"""Parse raw links from each line in a text file""" """Parse raw links from each line in a text file"""
text_file.seek(0) text_file.seek(0)