diff --git a/archivebox/api/migrations/0003_outboundwebhook_apitoken_abid_apitoken_uuid_and_more.py b/archivebox/api/migrations/0003_outboundwebhook_apitoken_abid_apitoken_uuid_and_more.py new file mode 100644 index 00000000..5674406a --- /dev/null +++ b/archivebox/api/migrations/0003_outboundwebhook_apitoken_abid_apitoken_uuid_and_more.py @@ -0,0 +1,60 @@ +# Generated by Django 5.0.6 on 2024-05-13 10:58 + +import charidfield.fields +import signal_webhooks.fields +import signal_webhooks.utils +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0002_alter_apitoken_options'), + ] + + operations = [ + migrations.CreateModel( + name='OutboundWebhook', + fields=[ + ('name', models.CharField(db_index=True, help_text='Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).', max_length=255, unique=True, verbose_name='name')), + ('signal', models.CharField(choices=[('CREATE', 'Create'), ('UPDATE', 'Update'), ('DELETE', 'Delete'), ('M2M', 'M2M changed'), ('CREATE_OR_UPDATE', 'Create or Update'), ('CREATE_OR_DELETE', 'Create or Delete'), ('CREATE_OR_M2M', 'Create or M2M changed'), ('UPDATE_OR_DELETE', 'Update or Delete'), ('UPDATE_OR_M2M', 'Update or M2M changed'), ('DELETE_OR_M2M', 'Delete or M2M changed'), ('CREATE_UPDATE_OR_DELETE', 'Create, Update or Delete'), ('CREATE_UPDATE_OR_M2M', 'Create, Update or M2M changed'), ('CREATE_DELETE_OR_M2M', 'Create, Delete or M2M changed'), ('UPDATE_DELETE_OR_M2M', 'Update, Delete or M2M changed'), ('CREATE_UPDATE_DELETE_OR_M2M', 'Create, Update or Delete, or M2M changed')], help_text='The type of event the webhook should fire for (e.g. Create, Update, Delete).', max_length=255, verbose_name='signal')), + ('ref', models.CharField(db_index=True, help_text='Dot import notation of the model the webhook should fire for (e.g. core.models.Snapshot or core.models.ArchiveResult).', max_length=1023, validators=[signal_webhooks.utils.model_from_reference], verbose_name='referenced model')), + ('endpoint', models.URLField(help_text='External URL to POST the webhook notification to (e.g. https://someapp.example.com/webhook/some-webhook-receiver).', max_length=2047, verbose_name='endpoint')), + ('headers', models.JSONField(blank=True, default=dict, help_text='Headers to send with the webhook request.', validators=[signal_webhooks.utils.is_dict], verbose_name='headers')), + ('auth_token', signal_webhooks.fields.TokenField(blank=True, default='', help_text='Authentication token to use in an Authorization header.', max_length=8000, validators=[signal_webhooks.utils.decode_cipher_key], verbose_name='authentication token')), + ('enabled', models.BooleanField(default=True, help_text='Is this webhook enabled?', verbose_name='enabled')), + ('keep_last_response', models.BooleanField(default=False, help_text='Should the webhook keep a log of the latest response it got?', verbose_name='keep last response')), + ('created', models.DateTimeField(auto_now_add=True, help_text='When the webhook was created.', verbose_name='created')), + ('updated', models.DateTimeField(auto_now=True, help_text='When the webhook was last updated.', verbose_name='updated')), + ('last_response', models.CharField(blank=True, default='', help_text='Latest response to this webhook.', max_length=8000, verbose_name='last response')), + ('last_success', models.DateTimeField(default=None, help_text='When the webhook last succeeded.', null=True, verbose_name='last success')), + ('last_failure', models.DateTimeField(default=None, help_text='When the webhook last failed.', null=True, verbose_name='last failure')), + ('uuid', models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False)), + ('abid', charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='whk', unique=True)), + ], + options={ + 'verbose_name': 'API Outbound Webhook', + 'abstract': False, + }, + ), + migrations.AddField( + model_name='apitoken', + name='abid', + field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='apt', unique=True), + ), + migrations.AddField( + model_name='apitoken', + name='uuid', + field=models.UUIDField(blank=True, null=True, unique=True), + ), + migrations.AlterField( + model_name='apitoken', + name='id', + field=models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False), + ), + migrations.AddConstraint( + model_name='outboundwebhook', + constraint=models.UniqueConstraint(fields=('ref', 'endpoint'), name='prevent_duplicate_hooks_api_outboundwebhook'), + ), + ] diff --git a/archivebox/core/migrations/0023_alter_archiveresult_options_archiveresult_abid_and_more.py b/archivebox/core/migrations/0023_alter_archiveresult_options_archiveresult_abid_and_more.py new file mode 100644 index 00000000..39d3d570 --- /dev/null +++ b/archivebox/core/migrations/0023_alter_archiveresult_options_archiveresult_abid_and_more.py @@ -0,0 +1,43 @@ +# Generated by Django 5.0.6 on 2024-05-13 10:56 + +import charidfield.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0022_auto_20231023_2008'), + ] + + operations = [ + migrations.AlterModelOptions( + name='archiveresult', + options={'verbose_name': 'Result'}, + ), + migrations.AddField( + model_name='archiveresult', + name='abid', + field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='res_', unique=True), + ), + migrations.AddField( + model_name='snapshot', + name='abid', + field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='snp_', unique=True), + ), + migrations.AddField( + model_name='snapshot', + name='uuid', + field=models.UUIDField(blank=True, null=True, unique=True), + ), + migrations.AddField( + model_name='tag', + name='abid', + field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='tag_', unique=True), + ), + migrations.AlterField( + model_name='archiveresult', + name='extractor', + field=models.CharField(choices=[('htmltotext', 'htmltotext'), ('git', 'git'), ('singlefile', 'singlefile'), ('media', 'media'), ('archive_org', 'archive_org'), ('readability', 'readability'), ('mercury', 'mercury'), ('favicon', 'favicon'), ('pdf', 'pdf'), ('headers', 'headers'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('title', 'title'), ('wget', 'wget')], max_length=32), + ), + ] diff --git a/archivebox/core/migrations/0024_auto_20240513_1143.py b/archivebox/core/migrations/0024_auto_20240513_1143.py new file mode 100644 index 00000000..31f1e773 --- /dev/null +++ b/archivebox/core/migrations/0024_auto_20240513_1143.py @@ -0,0 +1,95 @@ +# Generated by Django 5.0.6 on 2024-05-13 11:43 + +from django.db import migrations +from datetime import datetime +from abid_utils.abid import abid_from_values + + +def calculate_abid(self): + """ + Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src). + """ + prefix = self.abid_prefix + ts = eval(self.abid_ts_src) + uri = eval(self.abid_uri_src) + subtype = eval(self.abid_subtype_src) + rand = eval(self.abid_rand_src) + + if (not prefix) or prefix == 'obj_': + suggested_abid = self.__class__.__name__[:3].lower() + raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})') + + if not ts: + ts = datetime.utcfromtimestamp(0) + print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat()) + + if not uri: + uri = str(self) + print(f'[!] WARNING: Generating ABID with uri=str(self) placeholder because {self.__class__.__name__}.abid_uri_src={self.abid_uri_src} is unset!', uri) + + if not subtype: + subtype = self.__class__.__name__ + print(f'[!] WARNING: Generating ABID with subtype={subtype} placeholder because {self.__class__.__name__}.abid_subtype_src={self.abid_subtype_src} is unset!', subtype) + + if not rand: + rand = getattr(self, 'uuid', None) or getattr(self, 'id', None) or getattr(self, 'pk') + print(f'[!] WARNING: Generating ABID with rand=self.id placeholder because {self.__class__.__name__}.abid_rand_src={self.abid_rand_src} is unset!', rand) + + abid = abid_from_values( + prefix=prefix, + ts=ts, + uri=uri, + subtype=subtype, + rand=rand, + ) + assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}' + return abid + + +def copy_snapshot_uuids(apps, schema_editor): + Snapshot = apps.get_model("core", "Snapshot") + for snapshot in Snapshot.objects.all(): + snapshot.uuid = snapshot.id + snapshot.save(update_fields=["uuid"]) + +def generate_snapshot_abids(apps, schema_editor): + Snapshot = apps.get_model("core", "Snapshot") + for snapshot in Snapshot.objects.all(): + snapshot.abid_prefix = 'snp_' + snapshot.abid_ts_src = 'self.added' + snapshot.abid_uri_src = 'self.url' + snapshot.abid_subtype_src = '"01"' + snapshot.abid_rand_src = 'self.uuid' + + snapshot.abid = calculate_abid(snapshot) + snapshot.save(update_fields=["abid"]) + +def generate_archiveresult_abids(apps, schema_editor): + ArchiveResult = apps.get_model("core", "ArchiveResult") + Snapshot = apps.get_model("core", "Snapshot") + for result in ArchiveResult.objects.all(): + result.abid_prefix = 'res_' + result.snapshot = Snapshot.objects.get(pk=result.snapshot_id) + result.snapshot_added = result.snapshot.added + result.snapshot_url = result.snapshot.url + result.abid_ts_src = 'self.snapshot_added' + result.abid_uri_src = 'self.snapshot_url' + result.abid_subtype_src = 'self.extractor' + result.abid_rand_src = 'self.id' + + result.abid = calculate_abid(result) + result.uuid = result.abid.uuid + result.save(update_fields=["abid", "uuid"]) + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0023_alter_archiveresult_options_archiveresult_abid_and_more'), + ] + + operations = [ + migrations.RunPython(copy_snapshot_uuids, reverse_code=migrations.RunPython.noop), + migrations.RunPython(generate_snapshot_abids, reverse_code=migrations.RunPython.noop), + migrations.RunPython(generate_archiveresult_abids, reverse_code=migrations.RunPython.noop), + ] diff --git a/archivebox/core/migrations/0025_alter_archiveresult_uuid.py b/archivebox/core/migrations/0025_alter_archiveresult_uuid.py new file mode 100644 index 00000000..b60d424b --- /dev/null +++ b/archivebox/core/migrations/0025_alter_archiveresult_uuid.py @@ -0,0 +1,19 @@ +# Generated by Django 5.0.6 on 2024-05-13 12:08 + +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0024_auto_20240513_1143'), + ] + + operations = [ + migrations.AlterField( + model_name='archiveresult', + name='uuid', + field=models.UUIDField(default=uuid.uuid4, editable=False, unique=True), + ), + ]