From 0b27f33d2eefcd05f46d60c9aaa15a5eee25e5b3 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Wed, 24 Apr 2019 04:06:54 -0400 Subject: [PATCH] fix sql link parsing and writing --- archivebox/legacy/index.py | 6 ++++-- archivebox/legacy/logs.py | 2 +- archivebox/legacy/storage/sql.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/archivebox/legacy/index.py b/archivebox/legacy/index.py index 2ce7bb82..29b355db 100644 --- a/archivebox/legacy/index.py +++ b/archivebox/legacy/index.py @@ -245,10 +245,12 @@ def load_main_index(out_dir: str=OUTPUT_DIR, warn: bool=True) -> List[Link]: all_links: List[Link] = [] all_links = list(parse_json_main_index(out_dir)) - links_from_sql = list(parse_sql_main_index()) + links_from_sql = list(parse_sql_main_index(out_dir)) - if warn and not set(l.url for l in all_links) == set(l['url'] for l in links_from_sql): + if warn and not set(l.url for l in all_links) == set(l.url for l in links_from_sql): stderr('{red}[!] Warning: SQL index does not match JSON index!{reset}'.format(**ANSI)) + stderr(' To repair the index and re-import any orphaned links run:') + stderr(' archivebox init') return all_links diff --git a/archivebox/legacy/logs.py b/archivebox/legacy/logs.py index 8cb13629..e0b34301 100644 --- a/archivebox/legacy/logs.py +++ b/archivebox/legacy/logs.py @@ -60,7 +60,7 @@ def log_indexing_process_started(num_links: int): start_ts = datetime.now() _LAST_RUN_STATS.index_start_ts = start_ts print() - print('{green}[*] [{}] Updating {} links in main index...{reset}'.format( + print('{green}[*] [{}] Writing {} links to main index...{reset}'.format( start_ts.strftime('%Y-%m-%d %H:%M:%S'), num_links, **ANSI, diff --git a/archivebox/legacy/storage/sql.py b/archivebox/legacy/storage/sql.py index be6bfbe2..302208ae 100644 --- a/archivebox/legacy/storage/sql.py +++ b/archivebox/legacy/storage/sql.py @@ -15,7 +15,7 @@ def parse_sql_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]: from core.models import Page return ( - page.as_json(*Page.keys) + Link.from_json(page.as_json(*Page.keys)) for page in Page.objects.all() )