From 104553489f9ab626d9299ec511552f2dfd16c635 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sat, 28 Nov 2020 02:12:27 -0500 Subject: [PATCH] remove redundant utils file --- archivebox/core/admin.py | 4 +-- archivebox/core/utils.py | 54 ---------------------------------- archivebox/core/views.py | 4 +-- archivebox/index/html.py | 63 +++++++++++++++++++++++++++++++++++++--- archivebox/util.py | 1 + 5 files changed, 64 insertions(+), 62 deletions(-) delete mode 100644 archivebox/core/utils.py diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index b15507a4..5d3db409 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -13,8 +13,8 @@ from django import forms from core.models import Snapshot, Tag from core.forms import AddLinkForm, TagField -from core.utils import get_icons +from index.html import snapshot_icons from util import htmldecode, urldecode, ansi_to_html from logging_util import printable_filesize from main import add, remove @@ -128,7 +128,7 @@ class SnapshotAdmin(admin.ModelAdmin): ) + mark_safe(f' {tags}') def files(self, obj): - return get_icons(obj) + return snapshot_icons(obj) def size(self, obj): archive_size = obj.archive_size diff --git a/archivebox/core/utils.py b/archivebox/core/utils.py deleted file mode 100644 index 39dca220..00000000 --- a/archivebox/core/utils.py +++ /dev/null @@ -1,54 +0,0 @@ -from django.utils.html import format_html -from collections import defaultdict - -from core.models import Snapshot, EXTRACTORS -from pathlib import Path - - -def get_icons(snapshot: Snapshot) -> str: - archive_results = snapshot.archiveresult_set.filter(status="succeeded") - link = snapshot.as_link() - path = link.archive_path - canon = link.canonical_outputs() - output = "" - output_template = '{} ' - icons = { - "singlefile": "❶", - "wget": "🆆", - "dom": "🅷", - "pdf": "📄", - "screenshot": "💻", - "media": "📼", - "git": "🅶", - "archive_org": "🏛", - "readability": "🆁", - "mercury": "🅼", - "warc": "📦" - } - exclude = ["favicon", "title", "headers", "archive_org"] - # Missing specific entry for WARC - - extractor_items = defaultdict(lambda: None) - for extractor, _ in EXTRACTORS: - for result in archive_results: - if result.extractor == extractor: - extractor_items[extractor] = result - - for extractor, _ in EXTRACTORS: - if extractor not in exclude: - exists = extractor_items[extractor] is not None - output += output_template.format(path, canon[f"{extractor}_path"], str(exists), - extractor, icons.get(extractor, "?")) - if extractor == "wget": - # warc isn't technically it's own extractor, so we have to add it after wget - exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz")) - output += output_template.format(exists[0] if exists else '#', canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?")) - - if extractor == "archive_org": - # The check for archive_org is different, so it has to be handled separately - target_path = Path(path) / "archive.org.txt" - exists = target_path.exists() - output += '{} '.format(canon["archive_org_path"], str(exists), - "archive_org", icons.get("archive_org", "?")) - - return format_html(f'{output}') diff --git a/archivebox/core/views.py b/archivebox/core/views.py index e8b20aec..aaef74e2 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -12,7 +12,6 @@ from django.views.generic import FormView from django.contrib.auth.mixins import UserPassesTestMixin from core.models import Snapshot -from core.utils import get_icons from core.forms import AddLinkForm from ..config import ( @@ -25,6 +24,7 @@ from ..config import ( ) from main import add from ..util import base_url, ansi_to_html +from ..index.html import snapshot_icons class MainIndex(View): @@ -108,7 +108,7 @@ class PublicArchiveView(ListView): if query: qs = Snapshot.objects.filter(title__icontains=query) for snapshot in qs: - snapshot.icons = get_icons(snapshot) + snapshot.icons = snapshot_icons(snapshot) return qs def get(self, *args, **kwargs): diff --git a/archivebox/index/html.py b/archivebox/index/html.py index 8b37c142..c107bb3b 100644 --- a/archivebox/index/html.py +++ b/archivebox/index/html.py @@ -5,8 +5,13 @@ from datetime import datetime from typing import List, Optional, Iterator, Mapping from pathlib import Path +from django.utils.html import format_html +from collections import defaultdict + +from pathlib import Path + from .schema import Link -from ..system import atomic_write, copy_and_overwrite +from ..system import atomic_write from ..logging_util import printable_filesize from ..util import ( enforce_types, @@ -23,9 +28,6 @@ from ..config import ( FOOTER_INFO, ARCHIVE_DIR_NAME, HTML_INDEX_FILENAME, - STATIC_DIR_NAME, - ROBOTS_TXT_FILENAME, - FAVICON_FILENAME, ) MAIN_INDEX_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index.html') @@ -143,3 +145,56 @@ def render_legacy_template(template_path: str, context: Mapping[str, str]) -> st with open(template_path, 'r', encoding='utf-8') as template: template_str = template.read() return Template(template_str).substitute(**context) + + + + +def snapshot_icons(snapshot) -> str: + from core.models import Snapshot, EXTRACTORS + + archive_results = snapshot.archiveresult_set.filter(status="succeeded") + link = snapshot.as_link() + path = link.archive_path + canon = link.canonical_outputs() + output = "" + output_template = '{} ' + icons = { + "singlefile": "❶", + "wget": "🆆", + "dom": "🅷", + "pdf": "📄", + "screenshot": "💻", + "media": "📼", + "git": "🅶", + "archive_org": "🏛", + "readability": "🆁", + "mercury": "🅼", + "warc": "📦" + } + exclude = ["favicon", "title", "headers", "archive_org"] + # Missing specific entry for WARC + + extractor_items = defaultdict(lambda: None) + for extractor, _ in EXTRACTORS: + for result in archive_results: + if result.extractor == extractor: + extractor_items[extractor] = result + + for extractor, _ in EXTRACTORS: + if extractor not in exclude: + exists = extractor_items[extractor] is not None + output += output_template.format(path, canon[f"{extractor}_path"], str(exists), + extractor, icons.get(extractor, "?")) + if extractor == "wget": + # warc isn't technically it's own extractor, so we have to add it after wget + exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz")) + output += output_template.format(exists[0] if exists else '#', canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?")) + + if extractor == "archive_org": + # The check for archive_org is different, so it has to be handled separately + target_path = Path(path) / "archive.org.txt" + exists = target_path.exists() + output += '{} '.format(canon["archive_org_path"], str(exists), + "archive_org", icons.get("archive_org", "?")) + + return format_html(f'{output}') diff --git a/archivebox/util.py b/archivebox/util.py index 4e55e30d..733fe8f5 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -246,6 +246,7 @@ def chrome_args(**options) -> List[str]: return cmd_args + def ansi_to_html(text): """ Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html