diff --git a/archivebox/core/models.py b/archivebox/core/models.py index fe2d05ab..dca6941f 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -60,6 +60,7 @@ class Tag(models.Model): else: return super().save(*args, **kwargs) + class Snapshot(models.Model): id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) @@ -158,6 +159,7 @@ class Snapshot(models.Model): self.tags.clear() self.tags.add(*tags_id) + class ArchiveResultManager(models.Manager): def indexable(self, sorted: bool = True): INDEXABLE_METHODS = [ r[0] for r in ARCHIVE_METHODS_INDEXING_PRECEDENCE ] @@ -167,6 +169,8 @@ class ArchiveResultManager(models.Manager): precedence = [ When(extractor=method, then=Value(precedence)) for method, precedence in ARCHIVE_METHODS_INDEXING_PRECEDENCE ] qs = qs.annotate(indexing_precedence=Case(*precedence, default=Value(1000),output_field=IntegerField())).order_by('indexing_precedence') return qs + + class ArchiveResult(models.Model): snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE) cmd = models.JSONField() diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py index bc65e276..7bce3313 100644 --- a/archivebox/logging_util.py +++ b/archivebox/logging_util.py @@ -443,7 +443,7 @@ def log_shell_welcome_msg(): from .cli import list_subcommands print('{green}# ArchiveBox Imports{reset}'.format(**ANSI)) - print('{green}from archivebox.core.models import Snapshot, User{reset}'.format(**ANSI)) + print('{green}from core.models import Snapshot, User{reset}'.format(**ANSI)) print('{green}from archivebox import *\n {}{reset}'.format("\n ".join(list_subcommands().keys()), **ANSI)) print() print('[i] Welcome to the ArchiveBox Shell!') diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py index ebeebcd0..9efe838b 100644 --- a/archivebox/search/__init__.py +++ b/archivebox/search/__init__.py @@ -95,7 +95,8 @@ def index_links(links: Union[List[Link],None], out_dir: Path=OUTPUT_DIR): from core.models import Snapshot, ArchiveResult for link in links: - if snap := Snapshot.objects.filter(url=link.url).first(): + snap = Snapshot.objects.filter(url=link.url).first() + if snap: results = ArchiveResult.objects.indexable().filter(snapshot=snap) log_index_started(link.url) try: diff --git a/archivebox/search/backends/ripgrep.py b/archivebox/search/backends/ripgrep.py index 07292e37..ff02008d 100644 --- a/archivebox/search/backends/ripgrep.py +++ b/archivebox/search/backends/ripgrep.py @@ -38,7 +38,8 @@ def search(text: str) -> List[str]: file_paths = [p.decode() for p in rg.stdout.splitlines()] timestamps = set() for path in file_paths: - if ts := ts_regex.findall(path): + ts = ts_regex.findall(path) + if ts: timestamps.add(ts[0]) snap_ids = [str(id) for id in Snapshot.objects.filter(timestamp__in=timestamps).values_list('pk', flat=True)]