From f9a7c530b97bdae3c82aa2c392f3238f305e70a9 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 19 Mar 2019 18:44:22 -0400 Subject: [PATCH] fix pinboard RSS parser --- archivebox/parse.py | 8 ++++---- archivebox/util.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/archivebox/parse.py b/archivebox/parse.py index bd311288..69a37014 100644 --- a/archivebox/parse.py +++ b/archivebox/parse.py @@ -228,9 +228,9 @@ def parse_pinboard_rss_export(rss_file): items = root.findall("{http://purl.org/rss/1.0/}item") for item in items: url = item.find("{http://purl.org/rss/1.0/}link").text - tags = item.find("{http://purl.org/dc/elements/1.1/}subject").text - title = item.find("{http://purl.org/rss/1.0/}title").text.strip() - ts_str = item.find("{http://purl.org/dc/elements/1.1/}date").text + tags = item.find("{http://purl.org/dc/elements/1.1/}subject").text if item.find("{http://purl.org/dc/elements/1.1/}subject") else None + title = item.find("{http://purl.org/rss/1.0/}title").text.strip() if item.find("{http://purl.org/rss/1.0/}title").text.strip() else None + ts_str = item.find("{http://purl.org/dc/elements/1.1/}date").text if item.find("{http://purl.org/dc/elements/1.1/}date").text else None # = 🌈🌈🌈🌈 # = 🌈🌈🌈🌈 # = 🏆🏆🏆🏆 @@ -243,7 +243,7 @@ def parse_pinboard_rss_export(rss_file): info = { 'url': url, 'timestamp': str(time.timestamp()), - 'tags': tags, + 'tags': tags or '', 'title': title or None, 'sources': [rss_file.name], } diff --git a/archivebox/util.py b/archivebox/util.py index 23752e8c..c0b8a50b 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -413,7 +413,7 @@ def derived_link_info(link): 'is_archived': os.path.exists(os.path.join( ARCHIVE_DIR, link['timestamp'], - wget_output_path(link) or domain(url) + domain(url), )), 'num_outputs': len([entry for entry in link['latest'].values() if entry]) if 'latest' in link else 0, }