From 28e50c5e494bb763afa1a7e34b1ea40ef701cf4f Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 16 Feb 2021 15:51:56 -0500 Subject: [PATCH] tweak snapshot asset serving logic to show multiple choices in case of conflict --- archivebox/core/views.py | 123 ++++++++++++++++++++------------------- 1 file changed, 62 insertions(+), 61 deletions(-) diff --git a/archivebox/core/views.py b/archivebox/core/views.py index da420aab..539d029c 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -62,19 +62,21 @@ class SnapshotView(View): try: try: snapshot = Snapshot.objects.get(timestamp=slug) + response = static.serve(request, archivefile, document_root=snapshot.link_dir, show_indexes=True) + response["Link"] = f'<{snapshot.url}>; rel="canonical"' + return response except Snapshot.DoesNotExist: if Snapshot.objects.filter(timestamp__startswith=slug).exists(): raise Snapshot.MultipleObjectsReturned - response = static.serve(request, archivefile, document_root=snapshot.link_dir, show_indexes=True) - response["Link"] = f'<{snapshot.url}>; rel="canonical"' - return response + else: + raise except Snapshot.DoesNotExist: # Snapshot does not exist return HttpResponse( format_html( ( '



' - 'No Snapshots match the given timestamp: {}

' + 'No Snapshots match the given timestamp or UUID: {}

' 'You can add a new Snapshot, or return to the Main Index' '
' ), @@ -99,7 +101,7 @@ class SnapshotView(View): return HttpResponse( format_html( ( - 'Multiple Snapshots match the given timestamp {}
'
+                            'Multiple Snapshots match the given timestamp/UUID {}
'
                         ),
                         slug,
                     ) + snapshot_hrefs + format_html(
@@ -134,70 +136,69 @@ class SnapshotView(View):
                     status=404,
                 )
         # slug is a URL
-        else:
+        try:
             try:
+                # try exact match on full url first
+                snapshot = Snapshot.objects.get(
+                    Q(url='http://' + path) | Q(url='https://' + path)
+                )
+            except Snapshot.DoesNotExist:
+                # fall back to match on exact base_url
                 try:
-                    # try exact match on full url first
                     snapshot = Snapshot.objects.get(
-                        Q(url='http://' + path) | Q(url='https://' + path)
+                        Q(url='http://' + base_url(path)) | Q(url='https://' + base_url(path))
                     )
                 except Snapshot.DoesNotExist:
-                    # fall back to match on exact base_url
-                    try:
-                        snapshot = Snapshot.objects.get(
-                            Q(url='http://' + base_url(path)) | Q(url='https://' + base_url(path))
-                        )
-                    except Snapshot.DoesNotExist:
-                        # fall back to matching base_url as prefix
-                        snapshot = Snapshot.objects.get(
-                            Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path))
-                        )
-                return redirect(f'/archive/{snapshot.timestamp}/index.html')
-            except Snapshot.DoesNotExist:
-                return HttpResponse(
-                    format_html(
-                        (
-                            '



' - 'No Snapshots match the given url: {}

' - 'You can add a new Snapshot, or return to the Main Index' - '
' - ), - base_url(path), - path, - ), - content_type="text/html", - status=404, - ) - except Snapshot.MultipleObjectsReturned: - snapshot_hrefs = mark_safe('
').join( - format_html( - '{} {} {} {}', - snap.added.strftime('%Y-%m-%d %H:%M:%S'), - snap.timestamp, - snap.timestamp, - snap.url, - snap.title or '', - ) - for snap in Snapshot.objects.filter( + # fall back to matching base_url as prefix + snapshot = Snapshot.objects.get( Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path)) - ).only('url', 'timestamp', 'title', 'added').order_by('-added') - ) - return HttpResponse( - format_html( - ( - 'Multiple Snapshots match the given URL {}
'
-                        ),
-                        base_url(path),
-                    ) + snapshot_hrefs + format_html(
-                        (
-                            '

' - 'Choose a Snapshot to proceed or go back to the Main Index' - ) + ) + return redirect(f'/archive/{snapshot.timestamp}/index.html') + except Snapshot.DoesNotExist: + return HttpResponse( + format_html( + ( + '



' + 'No Snapshots match the given url: {}

' + 'You can add a new Snapshot, or return to the Main Index' + '
' ), - content_type="text/html", - status=404, + base_url(path), + path, + ), + content_type="text/html", + status=404, + ) + except Snapshot.MultipleObjectsReturned: + snapshot_hrefs = mark_safe('
').join( + format_html( + '{} {} {} {}', + snap.added.strftime('%Y-%m-%d %H:%M:%S'), + snap.timestamp, + snap.timestamp, + snap.url, + snap.title or '', ) - + for snap in Snapshot.objects.filter( + Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path)) + ).only('url', 'timestamp', 'title', 'added').order_by('-added') + ) + return HttpResponse( + format_html( + ( + 'Multiple Snapshots match the given URL {}
'
+                    ),
+                    base_url(path),
+                ) + snapshot_hrefs + format_html(
+                    (
+                        '

' + 'Choose a Snapshot to proceed or go back to the Main Index' + ) + ), + content_type="text/html", + status=404, + ) + class PublicIndexView(ListView): template_name = 'public_index.html'