1
0
Fork 0
mirror of synced 2024-06-26 10:00:19 +12:00

add ulid and typeid to Snapshot and ArchiveResult

This commit is contained in:
Nick Sweeting 2024-05-12 04:45:34 -07:00
parent 0529099639
commit 33bc4622a0
No known key found for this signature in database
3 changed files with 85 additions and 4 deletions

View file

@ -2,10 +2,13 @@ __package__ = 'archivebox.core'
import uuid
import ulid
import json
import hashlib
from typeid import TypeID
from pathlib import Path
from typing import Optional, List
from typing import Optional, List, NamedTuple
from importlib import import_module
from django.db import models
@ -37,6 +40,13 @@ except AttributeError:
JSONField = jsonfield.JSONField
class ULIDParts(NamedTuple):
timestamp: str
url: str
subtype: str
randomness: str
class Tag(models.Model):
"""
Based on django-taggit model
@ -99,6 +109,38 @@ class Snapshot(models.Model):
keys = ('url', 'timestamp', 'title', 'tags', 'updated')
@property
def ulid_from_timestamp(self):
return str(ulid.from_timestamp(self.added))[:10]
@property
def ulid_from_urlhash(self):
return str(ulid.from_randomness(self.url_hash))[10:18]
@property
def ulid_from_type(self):
return '00'
@property
def ulid_from_randomness(self):
return str(ulid.from_uuid(self.id))[20:]
@property
def ulid_tuple(self) -> ULIDParts:
return ULIDParts(self.ulid_from_timestamp, self.ulid_from_urlhash, self.ulid_from_type, self.ulid_from_randomness)
@property
def ulid(self):
return ulid.parse(''.join(self.ulid_tuple))
@property
def uuid(self):
return self.ulid.uuid
@property
def typeid(self):
return TypeID.from_uuid(prefix='snapshot', suffix=self.ulid.uuid)
def __repr__(self) -> str:
title = self.title or '-'
return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'
@ -163,7 +205,10 @@ class Snapshot(models.Model):
@cached_property
def url_hash(self):
return hashurl(self.url)
# return hashurl(self.url)
url_hash = hashlib.new('sha256')
url_hash.update(self.url.encode('utf-8'))
return url_hash.hexdigest()[:16]
@cached_property
def base_url(self):
@ -271,7 +316,7 @@ class ArchiveResult(models.Model):
EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
uuid = models.UUIDField(default=uuid.uuid4, editable=False)
uuid = models.UUIDField(default=uuid.uuid4, editable=True)
snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32)
@ -292,6 +337,40 @@ class ArchiveResult(models.Model):
def snapshot_dir(self):
return Path(self.snapshot.link_dir)
@property
def ulid_from_timestamp(self):
return self.snapshot.ulid_from_timestamp
@property
def ulid_from_urlhash(self):
return self.snapshot.ulid_from_urlhash
@property
def ulid_from_snapshot(self):
return str(self.snapshot.ulid)[:18]
@property
def ulid_from_type(self):
return hashlib.sha256(self.extractor.encode('utf-8')).hexdigest()[:2]
@property
def ulid_from_randomness(self):
return str(ulid.from_uuid(self.uuid))[20:]
@property
def ulid_tuple(self) -> ULIDParts:
return ULIDParts(self.ulid_from_timestamp, self.ulid_from_urlhash, self.ulid_from_type, self.ulid_from_randomness)
@property
def ulid(self):
final_ulid = ulid.parse(''.join(self.ulid_tuple))
# TODO: migrate self.uuid to match this new uuid
# self.uuid = final_ulid.uuid
return final_ulid
@property
def typeid(self):
return TypeID.from_uuid(prefix='result', suffix=self.ulid.uuid)
@property
def extractor_module(self):

View file

@ -263,7 +263,7 @@ CACHES = {
'default': {'BACKEND': 'django.core.cache.backends.db.DatabaseCache', 'LOCATION': 'cache'},
'dummy': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'},
'locmem': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'},
# 'filebased': {"BACKEND": "django.core.cache.backends.filebased.FileBasedCache", "LOCATION": CACHE_DIR / 'cache_filebased'},
'filebased': {"BACKEND": "django.core.cache.backends.filebased.FileBasedCache", "LOCATION": CACHE_DIR / 'cache_filebased'},
}
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'

View file

@ -37,6 +37,8 @@ dependencies = [
# - See Github issues for more...
"django-signal-webhooks>=0.3.0",
"django-admin-data-views>=0.3.1",
"ulid-py>=1.1.0",
"typeid-python>=0.3.0",
]
homepage = "https://github.com/ArchiveBox/ArchiveBox"