1
0
Fork 0
mirror of synced 2024-06-26 18:10:24 +12:00

Add ArchiveResult Manager and sorted indexable filter

This commit is contained in:
JDC 2020-11-23 13:04:38 -05:00 committed by Nick Sweeting
parent 23a9beb4e0
commit 7903db6dfb
2 changed files with 17 additions and 3 deletions

View file

@ -5,10 +5,11 @@ import uuid
from django.db import models, transaction
from django.utils.functional import cached_property
from django.utils.text import slugify
from django.db.models import Case, When, Value, IntegerField
from ..util import parse_date
from ..index.schema import Link
from ..extractors import get_default_archive_methods
from ..extractors import get_default_archive_methods, ARCHIVE_METHODS_INDEXING_PRECEDENCE
EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()]
STATUS_CHOICES = [
@ -91,7 +92,7 @@ class Snapshot(models.Model):
return {
key: getattr(self, key)
if key != 'tags' else self.tags_str()
for key in args
for key in args
}
def as_link(self) -> Link:
@ -100,7 +101,7 @@ class Snapshot(models.Model):
def as_link_with_details(self) -> Link:
from ..index import load_link_details
return load_link_details(self.as_link())
def tags_str(self) -> str:
return ','.join(self.tags.order_by('name').values_list('name', flat=True))
@ -157,7 +158,15 @@ class Snapshot(models.Model):
self.tags.clear()
self.tags.add(*tags_id)
class ArchiveResultManager(models.Manager):
def indexable(self, sorted: bool = True):
INDEXABLE_METHODS = [ r[0] for r in ARCHIVE_METHODS_INDEXING_PRECEDENCE ]
qs = self.get_queryset().filter(extractor__in=INDEXABLE_METHODS,status='succeeded')
if sorted:
precedence = [ When(extractor=method, then=Value(precedence)) for method, precedence in ARCHIVE_METHODS_INDEXING_PRECEDENCE ]
qs = qs.annotate(indexing_precedence=Case(*precedence, default=Value(1000),output_field=IntegerField())).order_by('indexing_precedence')
return qs
class ArchiveResult(models.Model):
snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
cmd = models.JSONField()
@ -169,5 +178,7 @@ class ArchiveResult(models.Model):
status = models.CharField(max_length=16, choices=STATUS_CHOICES)
extractor = models.CharField(choices=EXTRACTORS, max_length=32)
objects = ArchiveResultManager()
def __str__(self):
return self.extractor

View file

@ -39,6 +39,7 @@ from .media import should_save_media, save_media
from .archive_org import should_save_archive_dot_org, save_archive_dot_org
from .headers import should_save_headers, save_headers
def get_default_archive_methods():
return [
('title', should_save_title, save_title),
@ -56,6 +57,8 @@ def get_default_archive_methods():
('archive_org', should_save_archive_dot_org, save_archive_dot_org),
]
ARCHIVE_METHODS_INDEXING_PRECEDENCE = [('readability', 1), ('singlefile', 2), ('dom', 3), ('wget', 4)]
@enforce_types
def ignore_methods(to_ignore: List[str]):
ARCHIVE_METHODS = get_default_archive_methods()