diff --git a/archivebox/cli/logging.py b/archivebox/cli/logging.py index e07b0719..078b3a09 100644 --- a/archivebox/cli/logging.py +++ b/archivebox/cli/logging.py @@ -218,6 +218,7 @@ def log_archiving_paused(num_links: int, idx: int, timestamp: str): timestamp=timestamp, total=num_links, )) + print() print(' To view your archive, open:') print(' {}/index.html'.format(OUTPUT_DIR)) print(' Continue archiving where you left off by running:') @@ -244,8 +245,11 @@ def log_archiving_finished(num_links: int): print(' - {} links skipped'.format(_LAST_RUN_STATS.skipped)) print(' - {} links updated'.format(_LAST_RUN_STATS.succeeded)) print(' - {} links had errors'.format(_LAST_RUN_STATS.failed)) + print() print(' To view your archive, open:') print(' {}/index.html'.format(OUTPUT_DIR)) + print(' Or run the built-in webserver:') + print(' archivebox server') def log_link_archiving_started(link: Link, link_dir: str, is_new: bool): @@ -378,15 +382,15 @@ def log_shell_welcome_msg(): from . import list_subcommands print('{green}# ArchiveBox Imports{reset}'.format(**ANSI)) - print('{green}from archivebox.core.models import Page, User{reset}'.format(**ANSI)) + print('{green}from archivebox.core.models import Snapshot, User{reset}'.format(**ANSI)) print('{green}from archivebox import *\n {}{reset}'.format("\n ".join(list_subcommands().keys()), **ANSI)) print() print('[i] Welcome to the ArchiveBox Shell!') print(' https://github.com/pirate/ArchiveBox/wiki/Usage#Shell-Usage') print() print(' {lightred}Hint:{reset} Example use:'.format(**ANSI)) - print(' print(Page.objects.filter(is_archived=True).count())') - print(' Page.objects.get(url="https://example.com").as_json()') + print(' print(Snapshot.objects.filter(is_archived=True).count())') + print(' Snapshot.objects.get(url="https://example.com").as_json()') print(' add("https://example.com/some/new/url")') diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index 23fe3286..526d0602 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -1,9 +1,9 @@ from django.contrib import admin -from core.models import Page +from core.models import Snapshot -class PageAdmin(admin.ModelAdmin): +class SnapshotAdmin(admin.ModelAdmin): list_display = ('timestamp', 'short_url', 'title', 'is_archived', 'num_outputs', 'added', 'updated', 'url_hash') readonly_fields = ('num_outputs', 'is_archived', 'added', 'updated', 'bookmarked') fields = ('url', 'timestamp', 'title', 'tags', *readonly_fields) @@ -14,4 +14,4 @@ class PageAdmin(admin.ModelAdmin): def updated(self, obj): return obj.isoformat() -admin.site.register(Page, PageAdmin) +admin.site.register(Snapshot, SnapshotAdmin) diff --git a/archivebox/core/migrations/0001_initial.py b/archivebox/core/migrations/0001_initial.py index 366db56c..73ac78e7 100644 --- a/archivebox/core/migrations/0001_initial.py +++ b/archivebox/core/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 2.2 on 2019-04-17 06:46 +# Generated by Django 2.2 on 2019-05-01 03:27 from django.db import migrations, models import uuid @@ -13,15 +13,14 @@ class Migration(migrations.Migration): operations = [ migrations.CreateModel( - name='Page', + name='Snapshot', fields=[ ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), - ('url', models.URLField()), - ('timestamp', models.CharField(default=None, max_length=32, null=True)), + ('url', models.URLField(unique=True)), + ('timestamp', models.CharField(default=None, max_length=32, null=True, unique=True)), ('title', models.CharField(default=None, max_length=128, null=True)), ('tags', models.CharField(default=None, max_length=256, null=True)), ('added', models.DateTimeField(auto_now_add=True)), - ('bookmarked', models.DateTimeField()), ('updated', models.DateTimeField(default=None, null=True)), ], ), diff --git a/archivebox/core/migrations/0002_auto_20190417_0739.py b/archivebox/core/migrations/0002_auto_20190417_0739.py deleted file mode 100644 index a265c13d..00000000 --- a/archivebox/core/migrations/0002_auto_20190417_0739.py +++ /dev/null @@ -1,27 +0,0 @@ -# Generated by Django 2.2 on 2019-04-17 07:39 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('core', '0001_initial'), - ] - - operations = [ - migrations.RemoveField( - model_name='page', - name='bookmarked', - ), - migrations.AlterField( - model_name='page', - name='timestamp', - field=models.CharField(default=None, max_length=32, null=True, unique=True), - ), - migrations.AlterField( - model_name='page', - name='url', - field=models.URLField(unique=True), - ), - ] diff --git a/archivebox/core/models.py b/archivebox/core/models.py index 2900f798..2c889585 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -8,7 +8,7 @@ from ..util import parse_date from ..index.schema import Link -class Page(models.Model): +class Snapshot(models.Model): id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) url = models.URLField(unique=True) diff --git a/archivebox/core/views.py b/archivebox/core/views.py index 9edb9557..2c140d58 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -5,7 +5,7 @@ from django.shortcuts import render, redirect from django.http import HttpResponse from django.views import View, static -from core.models import Page +from core.models import Snapshot from ..index import load_main_index, load_main_index_meta from ..config import OUTPUT_DIR, VERSION, FOOTER_INFO @@ -59,7 +59,7 @@ class LinkDetails(View): except (IndexError, ValueError): slug, archivefile = path.split('/', 1)[0], 'index.html' - all_pages = list(Page.objects.all()) + all_pages = list(Snapshot.objects.all()) # slug is a timestamp by_ts = {page.timestamp: page for page in all_pages} diff --git a/archivebox/index/sql.py b/archivebox/index/sql.py index 942054c2..f861adaf 100644 --- a/archivebox/index/sql.py +++ b/archivebox/index/sql.py @@ -13,30 +13,38 @@ from ..config import setup_django, OUTPUT_DIR @enforce_types def parse_sql_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]: setup_django(out_dir, check_db=True) - from core.models import Page + from core.models import Snapshot return ( - Link.from_json(page.as_json(*Page.keys)) - for page in Page.objects.all() + Link.from_json(page.as_json(*Snapshot.keys)) + for page in Snapshot.objects.all() ) @enforce_types def write_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None: setup_django(out_dir, check_db=True) - from core.models import Page + from core.models import Snapshot + from django.db import transaction all_urls = {link.url: link for link in links} + all_ts = {link.timestamp: link for link in links} - for page in Page.objects.all(): - if page.url in all_urls: - info = {k: v for k, v in all_urls.pop(page.url)._asdict().items() if k in Page.keys} - Page.objects.update(**info) - else: - page.delete() + with transaction.atomic(): + for snapshot in Snapshot.objects.all(): + if snapshot.timestamp in all_ts: + info = {k: v for k, v in all_urls.pop(snapshot.url)._asdict().items() if k in Snapshot.keys} + snapshot.delete() + Snapshot.objects.create(**info) + if snapshot.url in all_urls: + info = {k: v for k, v in all_urls.pop(snapshot.url)._asdict().items() if k in Snapshot.keys} + snapshot.delete() + Snapshot.objects.create(**info) + else: + snapshot.delete() - for url, link in all_urls.items(): - info = {k: v for k, v in link._asdict().items() if k in Page.keys} - Page.objects.update_or_create(url=url, defaults=info) + for url, link in all_urls.items(): + info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys} + Snapshot.objects.update_or_create(url=url, defaults=info)