From 309a87e8fecdcd291d64d66add47c46d766dd9e0 Mon Sep 17 00:00:00 2001 From: Cristian Date: Wed, 4 Nov 2020 07:28:02 -0500 Subject: [PATCH] feat: Add extractor field to the database --- archivebox/core/migrations/0007_archiveresult.py | 3 ++- archivebox/core/models.py | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/archivebox/core/migrations/0007_archiveresult.py b/archivebox/core/migrations/0007_archiveresult.py index 56f4143e..1d0da342 100644 --- a/archivebox/core/migrations/0007_archiveresult.py +++ b/archivebox/core/migrations/0007_archiveresult.py @@ -1,4 +1,4 @@ -# Generated by Django 3.0.8 on 2020-11-03 14:52 +# Generated by Django 3.0.8 on 2020-11-04 12:25 from django.db import migrations, models import django.db.models.deletion @@ -21,6 +21,7 @@ class Migration(migrations.Migration): ('output', models.CharField(default='', max_length=500)), ('start_ts', models.DateTimeField()), ('end_ts', models.DateTimeField()), + ('extractor', models.CharField(choices=[('title', 'title'), ('favicon', 'favicon'), ('wget', 'wget'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('headers', 'headers'), ('archive_org', 'archive_org')], max_length=20)), ('snapshot', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.Snapshot')), ], ), diff --git a/archivebox/core/models.py b/archivebox/core/models.py index 53c43e29..944d8612 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -8,6 +8,9 @@ from django.utils.text import slugify from ..util import parse_date from ..index.schema import Link +from ..extractors import get_default_archive_methods + +EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()] class Tag(models.Model): @@ -157,4 +160,5 @@ class ArchiveResult(models.Model): cmd_version = models.CharField(max_length=20, default="") output = models.CharField(max_length=500, default="") start_ts = models.DateTimeField() - end_ts = models.DateTimeField() \ No newline at end of file + end_ts = models.DateTimeField() + extractor = models.CharField(choices=EXTRACTORS, blank=False, max_length=20) \ No newline at end of file