diff --git a/archivebox/core/models.py b/archivebox/core/models.py index 690f2831..aa5dc951 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -79,7 +79,7 @@ class Snapshot(models.Model): title = models.CharField(max_length=128, null=True, blank=True, db_index=True) added = models.DateTimeField(auto_now_add=True, db_index=True) - updated = models.DateTimeField(null=True, blank=True, db_index=True) + updated = models.DateTimeField(auto_now=True, blank=True, null=True, db_index=True) tags = models.ManyToManyField(Tag) keys = ('url', 'timestamp', 'title', 'tags', 'updated') @@ -205,12 +205,15 @@ class ArchiveResultManager(models.Manager): class ArchiveResult(models.Model): + id = models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID') + uuid = models.UUIDField(default=uuid.uuid4, editable=False) + snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE) cmd = JSONField() pwd = models.CharField(max_length=256) - cmd_version = models.CharField(max_length=32, default=None, null=True, blank=True) - output = models.CharField(max_length=512) - start_ts = models.DateTimeField() + cmd_version = models.CharField(max_length=128, default=None, null=True, blank=True) + output = models.CharField(max_length=1024) + start_ts = models.DateTimeField(db_index=True) end_ts = models.DateTimeField() status = models.CharField(max_length=16, choices=STATUS_CHOICES) extractor = models.CharField(choices=EXTRACTORS, max_length=32) diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py index df43d7b7..889d74b1 100644 --- a/archivebox/index/schema.py +++ b/archivebox/index/schema.py @@ -178,6 +178,7 @@ class Link: raise def _asdict(self, extended=False): + from core.models import Snapshot info = { 'schema': 'Link', 'url': self.url, @@ -190,6 +191,7 @@ class Link: } if extended: info.update({ + 'snapshot_id': self.snapshot_id, 'link_dir': self.link_dir, 'archive_path': self.archive_path, @@ -258,6 +260,11 @@ class Link: return to_csv(self, cols=cols or self.field_names(), separator=separator, ljust=ljust) + @cached_property + def snapshot_id(self): + from core.models import Snapshot + return str(Snapshot.objects.only('id').get(url=self.url).id) + @classmethod def field_names(cls): return [f.name for f in fields(cls)]