fix: Partially restore server command functionality (html still needs some refactoring)

2024-06-28 11:00:35 +12:00 · 2021-01-04 09:14:02 -05:00 · 2021-01-04 09:14:02 -05:00 · 15d88be229
parent e95f14d1d0
commit 15d88be229
3 changed files with 66 additions and 29 deletions
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@ -3,6 +3,7 @@ __package__ = 'archivebox.core'
 import uuid
 from pathlib import Path
 from typing import Dict, Optional, List
 from datetime import datetime
 from django.db import models, transaction
 from django.utils.functional import cached_property
@ -12,6 +13,7 @@ from django.db.models import Case, When, Value, IntegerField
 from ..util import parse_date
 from ..index.schema import Link
 from ..config import CONFIG
 from ..system import get_dir_size
 #EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()]
 EXTRACTORS = [("title", "title"), ("wget", "wget")]
@ -133,8 +135,36 @@ class Snapshot(models.Model):
        return parse_date(self.timestamp)
    @cached_property
-    def is_archived(self):
+    def is_archived(self) -> bool:
-        return self.as_link().is_archived
+        from ..config import ARCHIVE_DIR
        from ..util import domain
        output_paths = (
            domain(self.url),
            'output.pdf',
            'screenshot.png',
            'output.html',
            'media',
            'singlefile.html'
        )
        return any(
            (Path(ARCHIVE_DIR) / self.timestamp / path).exists()
            for path in output_paths
        )
    @cached_property
    def archive_dates(self) -> List[datetime]:
        return [
            result.start_ts
            for result in self.archiveresult_set.all()
        ]
    @cached_property
    def oldest_archive_date(self) -> Optional[datetime]:
        oldest = self.archiveresult_set.all().order_by("-start_ts")[:1]
        if len(oldest) > 0:
            return oldest[0].start_ts
    @cached_property
    def num_outputs(self):
@ -145,8 +175,9 @@ class Snapshot(models.Model):
        return self.as_link().url_hash
    @cached_property
-    def base_url(self):
+    def base_url(self) -> str:
-        return self.as_link().base_url
+        from ..util import base_url
        return base_url(self.url)
    @cached_property
    def snapshot_dir(self):
@ -155,11 +186,15 @@ class Snapshot(models.Model):
    @cached_property
    def archive_path(self):
-        return self.as_link().archive_path
+        from ..config import ARCHIVE_DIR_NAME
        return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp)
    @cached_property
-    def archive_size(self):
+    def archive_size(self) -> float:
-        return self.as_link().archive_size
+        try:
            return get_dir_size(self.archive_path)[0]
        except Exception:
            return 0
    @cached_property
    def history(self):
@ -191,7 +226,10 @@ class Snapshot(models.Model):
        # TODO: Define what details are, and return them accordingly
        return {"history": {}}
-
+    @property
    def extension(self) -> str:
        from ..util import extension
        return extension(self.url)
    def canonical_outputs(self) -> Dict[str, Optional[str]]:
        """predict the expected output paths that should be present after archiving"""
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@ -61,7 +61,7 @@ class LinkDetails(View):
        by_ts = {page.timestamp: page for page in all_pages}
        try:
            # print('SERVING STATICFILE', by_ts[slug].link_dir, request.path, path)
-            response = static.serve(request, archivefile, document_root=by_ts[slug].link_dir, show_indexes=True)
+            response = static.serve(request, archivefile, document_root=by_ts[slug].snapshot_dir, show_indexes=True)
            response["Link"] = f'<{by_ts[slug].url}>; rel="canonical"'
            return response
        except KeyError:
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@ -61,10 +61,10 @@ def main_index_template(snapshots: List[Model], template: str=MAIN_INDEX_TEMPLAT
    return render_django_template(template, {
        'version': VERSION,
        'git_sha': GIT_SHA,
-        'num_links': str(len(snapshots)),
+        'num_snapshots': str(len(snapshots)),
        'date_updated': datetime.now().strftime('%Y-%m-%d'),
        'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'),
-        'links': [snapshot.as_json() for snapshot in snapshots],
+        'snapshots': snapshots,
        'FOOTER_INFO': FOOTER_INFO,
    })
@ -80,30 +80,30 @@ def write_html_snapshot_details(snapshot: Model, out_dir: Optional[str]=None) ->
@enforce_types
-def link_details_template(link: Link) -> str:
+def link_details_template(snapshot: Model) -> str:
    from ..extractors.wget import wget_output_path
-    link_info = link._asdict(extended=True)
+    snapshot._asdict()
    return render_django_template(LINK_DETAILS_TEMPLATE, {
-        **link_info,
+        **snapshot._asdict(),
-        **link_info['canonical'],
+        **snapshot.canonical_outputs(),
        'title': htmlencode(
-            link.title
+            snapshot.title
-            or (link.base_url if link.is_archived else TITLE_LOADING_MSG)
+            or (snapshot.base_url if snapshot.is_archived else TITLE_LOADING_MSG)
        ),
-        'url_str': htmlencode(urldecode(link.base_url)),
+        'url_str': htmlencode(urldecode(snapshot.base_url)),
        'archive_url': urlencode(
-            wget_output_path(link)
+            wget_output_path(snapshot)
-            or (link.domain if link.is_archived else '')
+            or (snapshot.domain if snapshot.is_archived else '')
        ) or 'about:blank',
-        'extension': link.extension or 'html',
+        'extension': snapshot.extension or 'html',
-        'tags': link.tags or 'untagged',
+        'tags': snapshot.tags.all() or 'untagged', #TODO: Return a proper comma separated list. Leaving it like this for now to revisit when fixing tags
-        'size': printable_filesize(link.archive_size) if link.archive_size else 'pending',
+        'size': printable_filesize(snapshot.archive_size) if snapshot.archive_size else 'pending',
-        'status': 'archived' if link.is_archived else 'not yet archived',
+        'status': 'archived' if snapshot.is_archived else 'not yet archived',
-        'status_color': 'success' if link.is_archived else 'danger',
+        'status_color': 'success' if snapshot.is_archived else 'danger',
-        'oldest_archive_date': ts_to_date(link.oldest_archive_date),
+        'oldest_archive_date': ts_to_date(snapshot.oldest_archive_date),
    })
@enforce_types
@ -118,9 +118,8 @@ def snapshot_icons(snapshot) -> str:
    from core.models import EXTRACTORS
    archive_results = snapshot.archiveresult_set.filter(status="succeeded")
-    link = snapshot.as_link()
+    path = snapshot.archive_path
-    path = link.archive_path
+    canon = snapshot.canonical_outputs()
    canon = link.canonical_outputs()
    output = ""
    output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
    icons = {