From 13c3650637445a3fc91874e387a8207514fa6d53 Mon Sep 17 00:00:00 2001 From: Cristian Date: Tue, 19 Jan 2021 11:24:41 -0500 Subject: [PATCH] fix: Update command was failing --- archivebox/main.py | 22 +++++++++++----------- archivebox/search/__init__.py | 12 ++++++------ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/archivebox/main.py b/archivebox/main.py index 01373a08..bfe59640 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -114,7 +114,7 @@ from .logging_util import ( printable_dependency_version, ) -from .search import flush_search_index, index_links +from .search import flush_search_index, index_snapshots ALLOWED_IN_OUTPUT_DIR = { 'lost+found', @@ -700,7 +700,7 @@ def update(resume: Optional[float]=None, check_data_folder(out_dir=out_dir) check_dependencies() - new_links: List[Snapshot] = [] # TODO: Remove input argument: only_new + new_snapshots: List[Snapshot] = [] # TODO: Remove input argument: only_new extractors = extractors.split(",") if extractors else [] @@ -717,25 +717,25 @@ def update(resume: Optional[float]=None, status=status, out_dir=out_dir, ) - all_links = [link for link in matching_folders.values() if link] + all_snapshots = [snapshot for snapshot in matching_folders.values()] if index_only: for snapshot in all_snapshots: write_snapshot_details(snapshot, out_dir=out_dir, skip_sql_index=True) - index_links(all_links, out_dir=out_dir) - return all_links + index_snapshots(all_snapshots, out_dir=out_dir) + return all_snapshots # Step 2: Run the archive methods for each link - to_archive = new_links if only_new else all_links + to_archive = new_snapshots if only_new else all_snapshots if resume: to_archive = [ - link for link in to_archive - if link.timestamp >= str(resume) + snapshot for snapshot in to_archive + if snapshot.timestamp >= str(resume) ] if not to_archive: stderr('') stderr(f'[√] Nothing found to resume after {resume}', color='green') - return all_links + return all_snapshots archive_kwargs = { "out_dir": out_dir, @@ -746,8 +746,8 @@ def update(resume: Optional[float]=None, archive_snapshots(to_archive, overwrite=overwrite, **archive_kwargs) # Step 4: Re-write links index with updated titles, icons, and resources - all_links = load_main_index(out_dir=out_dir) - return all_links + all_snapshots = load_main_index(out_dir=out_dir) + return all_snapshots @enforce_types def list_all(filter_patterns_str: Optional[str]=None, diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py index d958f324..2f5cc1ad 100644 --- a/archivebox/search/__init__.py +++ b/archivebox/search/__init__.py @@ -83,17 +83,17 @@ def flush_search_index(snapshots: QuerySet): ) @enforce_types -def index_links(links: Union[List[Link],None], out_dir: Path=OUTPUT_DIR): - if not links: +def index_snapshots(snapshots: Union[List[Model],None], out_dir: Path=OUTPUT_DIR): + if not snapshots: return from core.models import Snapshot, ArchiveResult - for link in links: - snap = Snapshot.objects.filter(url=link.url).first() + for snapshot in snapshots: + snap = Snapshot.objects.filter(url=snapshot.url).first() if snap: results = ArchiveResult.objects.indexable().filter(snapshot=snap) - log_index_started(link.url) + log_index_started(snapshot.url) try: texts = get_indexable_content(results) except Exception as err: @@ -103,4 +103,4 @@ def index_links(links: Union[List[Link],None], out_dir: Path=OUTPUT_DIR): color='red', ) else: - write_search_index(link, texts, out_dir=out_dir) + write_search_index(snapshot, texts, out_dir=out_dir)