diff --git a/archivebox/core/models.py b/archivebox/core/models.py index bdffddc5..5bf2a25b 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -4,6 +4,7 @@ import uuid from pathlib import Path from typing import Dict, Optional, List from datetime import datetime +from collections import defaultdict from django.db import models, transaction from django.utils.functional import cached_property @@ -107,7 +108,33 @@ class Snapshot(models.Model): info.pop("tags") return cls(**info) + def get_history(self) -> dict: + """ + Generates the history dictionary out of the stored ArchiveResults + """ + history_list = self.archiveresult_set.all() + history = defaultdict(list) + for history_item in history_list: + history[history_item.extractor].append( + { + "cmd": history_item.cmd, + "cmd_version": history_item.cmd_version, + "end_ts": history_item.end_ts.isoformat(), + "start_ts": history_item.start_ts.isoformat(), + "pwd": history_item.pwd, + "output": history_item.output, + "schema": "ArchiveResult", + "status": history_item.status + } + ) + return dict(history) + def as_json(self, *args) -> dict: + """ + Returns the snapshot in json format. + id is converted to str + history is extracted from ArchiveResult + """ args = args or self.keys output = { key: getattr(self, key) @@ -116,6 +143,8 @@ class Snapshot(models.Model): } if "id" in output.keys(): output["id"] = str(output["id"]) + + output["history"] = self.get_history() return output diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py index 278706eb..4c6e6a67 100644 --- a/archivebox/extractors/__init__.py +++ b/archivebox/extractors/__init__.py @@ -88,7 +88,8 @@ def archive_snapshot(snapshot: Model, overwrite: bool=False, methods: Optional[I details = {"history": {}} write_snapshot_details(snapshot, out_dir=out_dir, skip_sql_index=False) else: - details = snapshot.details + details = snapshot.details #TODO: This can be retrieved from the sqlite database too. + # If that makes more sense, it can be easily changed. #log_link_archiving_started(link, out_dir, is_new) stats = {'skipped': 0, 'succeeded': 0, 'failed': 0} @@ -103,8 +104,6 @@ def archive_snapshot(snapshot: Model, overwrite: bool=False, methods: Optional[I result = method_function(snapshot=snapshot, out_dir=out_dir) - details["history"][method_name].append(result) - stats[result.status] += 1 log_archive_method_finished(result) write_search_index(snapshot=snapshot, texts=result.index_texts) @@ -135,7 +134,7 @@ def archive_snapshot(snapshot: Model, overwrite: bool=False, methods: Optional[I except KeyboardInterrupt: try: - write_snapshot_details(snapshot, out_dir=link.link_dir) + write_snapshot_details(snapshot, out_dir=snapshot.snapshot_dir) except: pass raise