From 084cf7ff51aed0350f767b42777439aae52fc423 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Wed, 17 Feb 2021 13:34:46 -0500 Subject: [PATCH] add more explanation about snapshot.save timestamp bump --- archivebox/extractors/__init__.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py index 8d924415..09b56c66 100644 --- a/archivebox/extractors/__init__.py +++ b/archivebox/extractors/__init__.py @@ -114,7 +114,14 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s write_search_index(link=link, texts=result.index_texts) ArchiveResult.objects.create(snapshot=snapshot, extractor=method_name, cmd=result.cmd, cmd_version=result.cmd_version, output=result.output, pwd=result.pwd, start_ts=result.start_ts, end_ts=result.end_ts, status=result.status) - snapshot.save() # bump the updated time + + + # bump the updated time on the main Snapshot here, this is critical + # to be able to cache summaries of the ArchiveResults for a given + # snapshot without having to load all the results from the DB each time. + # (we use {Snapshot.id}-{Snapshot.updated} as the cache key and assume + # ArchiveResults are unchanged as long as the updated timestamp is unchanged) + snapshot.save() else: # print('{black} X {}{reset}'.format(method_name, **ANSI)) stats['skipped'] += 1