1
0
Fork 0
mirror of synced 2024-06-28 11:00:35 +12:00

fix: Partially restore server command functionality (html still needs some refactoring)

This commit is contained in:
Cristian 2021-01-04 09:14:02 -05:00
parent e95f14d1d0
commit 15d88be229
3 changed files with 66 additions and 29 deletions

View file

@ -3,6 +3,7 @@ __package__ = 'archivebox.core'
import uuid import uuid
from pathlib import Path from pathlib import Path
from typing import Dict, Optional, List from typing import Dict, Optional, List
from datetime import datetime
from django.db import models, transaction from django.db import models, transaction
from django.utils.functional import cached_property from django.utils.functional import cached_property
@ -12,6 +13,7 @@ from django.db.models import Case, When, Value, IntegerField
from ..util import parse_date from ..util import parse_date
from ..index.schema import Link from ..index.schema import Link
from ..config import CONFIG from ..config import CONFIG
from ..system import get_dir_size
#EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()] #EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()]
EXTRACTORS = [("title", "title"), ("wget", "wget")] EXTRACTORS = [("title", "title"), ("wget", "wget")]
@ -133,8 +135,36 @@ class Snapshot(models.Model):
return parse_date(self.timestamp) return parse_date(self.timestamp)
@cached_property @cached_property
def is_archived(self): def is_archived(self) -> bool:
return self.as_link().is_archived from ..config import ARCHIVE_DIR
from ..util import domain
output_paths = (
domain(self.url),
'output.pdf',
'screenshot.png',
'output.html',
'media',
'singlefile.html'
)
return any(
(Path(ARCHIVE_DIR) / self.timestamp / path).exists()
for path in output_paths
)
@cached_property
def archive_dates(self) -> List[datetime]:
return [
result.start_ts
for result in self.archiveresult_set.all()
]
@cached_property
def oldest_archive_date(self) -> Optional[datetime]:
oldest = self.archiveresult_set.all().order_by("-start_ts")[:1]
if len(oldest) > 0:
return oldest[0].start_ts
@cached_property @cached_property
def num_outputs(self): def num_outputs(self):
@ -145,8 +175,9 @@ class Snapshot(models.Model):
return self.as_link().url_hash return self.as_link().url_hash
@cached_property @cached_property
def base_url(self): def base_url(self) -> str:
return self.as_link().base_url from ..util import base_url
return base_url(self.url)
@cached_property @cached_property
def snapshot_dir(self): def snapshot_dir(self):
@ -155,11 +186,15 @@ class Snapshot(models.Model):
@cached_property @cached_property
def archive_path(self): def archive_path(self):
return self.as_link().archive_path from ..config import ARCHIVE_DIR_NAME
return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp)
@cached_property @cached_property
def archive_size(self): def archive_size(self) -> float:
return self.as_link().archive_size try:
return get_dir_size(self.archive_path)[0]
except Exception:
return 0
@cached_property @cached_property
def history(self): def history(self):
@ -191,7 +226,10 @@ class Snapshot(models.Model):
# TODO: Define what details are, and return them accordingly # TODO: Define what details are, and return them accordingly
return {"history": {}} return {"history": {}}
@property
def extension(self) -> str:
from ..util import extension
return extension(self.url)
def canonical_outputs(self) -> Dict[str, Optional[str]]: def canonical_outputs(self) -> Dict[str, Optional[str]]:
"""predict the expected output paths that should be present after archiving""" """predict the expected output paths that should be present after archiving"""

View file

@ -61,7 +61,7 @@ class LinkDetails(View):
by_ts = {page.timestamp: page for page in all_pages} by_ts = {page.timestamp: page for page in all_pages}
try: try:
# print('SERVING STATICFILE', by_ts[slug].link_dir, request.path, path) # print('SERVING STATICFILE', by_ts[slug].link_dir, request.path, path)
response = static.serve(request, archivefile, document_root=by_ts[slug].link_dir, show_indexes=True) response = static.serve(request, archivefile, document_root=by_ts[slug].snapshot_dir, show_indexes=True)
response["Link"] = f'<{by_ts[slug].url}>; rel="canonical"' response["Link"] = f'<{by_ts[slug].url}>; rel="canonical"'
return response return response
except KeyError: except KeyError:

View file

@ -61,10 +61,10 @@ def main_index_template(snapshots: List[Model], template: str=MAIN_INDEX_TEMPLAT
return render_django_template(template, { return render_django_template(template, {
'version': VERSION, 'version': VERSION,
'git_sha': GIT_SHA, 'git_sha': GIT_SHA,
'num_links': str(len(snapshots)), 'num_snapshots': str(len(snapshots)),
'date_updated': datetime.now().strftime('%Y-%m-%d'), 'date_updated': datetime.now().strftime('%Y-%m-%d'),
'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'), 'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'),
'links': [snapshot.as_json() for snapshot in snapshots], 'snapshots': snapshots,
'FOOTER_INFO': FOOTER_INFO, 'FOOTER_INFO': FOOTER_INFO,
}) })
@ -80,30 +80,30 @@ def write_html_snapshot_details(snapshot: Model, out_dir: Optional[str]=None) ->
@enforce_types @enforce_types
def link_details_template(link: Link) -> str: def link_details_template(snapshot: Model) -> str:
from ..extractors.wget import wget_output_path from ..extractors.wget import wget_output_path
link_info = link._asdict(extended=True) snapshot._asdict()
return render_django_template(LINK_DETAILS_TEMPLATE, { return render_django_template(LINK_DETAILS_TEMPLATE, {
**link_info, **snapshot._asdict(),
**link_info['canonical'], **snapshot.canonical_outputs(),
'title': htmlencode( 'title': htmlencode(
link.title snapshot.title
or (link.base_url if link.is_archived else TITLE_LOADING_MSG) or (snapshot.base_url if snapshot.is_archived else TITLE_LOADING_MSG)
), ),
'url_str': htmlencode(urldecode(link.base_url)), 'url_str': htmlencode(urldecode(snapshot.base_url)),
'archive_url': urlencode( 'archive_url': urlencode(
wget_output_path(link) wget_output_path(snapshot)
or (link.domain if link.is_archived else '') or (snapshot.domain if snapshot.is_archived else '')
) or 'about:blank', ) or 'about:blank',
'extension': link.extension or 'html', 'extension': snapshot.extension or 'html',
'tags': link.tags or 'untagged', 'tags': snapshot.tags.all() or 'untagged', #TODO: Return a proper comma separated list. Leaving it like this for now to revisit when fixing tags
'size': printable_filesize(link.archive_size) if link.archive_size else 'pending', 'size': printable_filesize(snapshot.archive_size) if snapshot.archive_size else 'pending',
'status': 'archived' if link.is_archived else 'not yet archived', 'status': 'archived' if snapshot.is_archived else 'not yet archived',
'status_color': 'success' if link.is_archived else 'danger', 'status_color': 'success' if snapshot.is_archived else 'danger',
'oldest_archive_date': ts_to_date(link.oldest_archive_date), 'oldest_archive_date': ts_to_date(snapshot.oldest_archive_date),
}) })
@enforce_types @enforce_types
@ -118,9 +118,8 @@ def snapshot_icons(snapshot) -> str:
from core.models import EXTRACTORS from core.models import EXTRACTORS
archive_results = snapshot.archiveresult_set.filter(status="succeeded") archive_results = snapshot.archiveresult_set.filter(status="succeeded")
link = snapshot.as_link() path = snapshot.archive_path
path = link.archive_path canon = snapshot.canonical_outputs()
canon = link.canonical_outputs()
output = "" output = ""
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>' output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
icons = { icons = {