diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py index 8d924415..09b56c66 100644 --- a/archivebox/extractors/__init__.py +++ b/archivebox/extractors/__init__.py @@ -114,7 +114,14 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s write_search_index(link=link, texts=result.index_texts) ArchiveResult.objects.create(snapshot=snapshot, extractor=method_name, cmd=result.cmd, cmd_version=result.cmd_version, output=result.output, pwd=result.pwd, start_ts=result.start_ts, end_ts=result.end_ts, status=result.status) - snapshot.save() # bump the updated time + + + # bump the updated time on the main Snapshot here, this is critical + # to be able to cache summaries of the ArchiveResults for a given + # snapshot without having to load all the results from the DB each time. + # (we use {Snapshot.id}-{Snapshot.updated} as the cache key and assume + # ArchiveResults are unchanged as long as the updated timestamp is unchanged) + snapshot.save() else: # print('{black} X {}{reset}'.format(method_name, **ANSI)) stats['skipped'] += 1