diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index b15507a4..5d3db409 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -13,8 +13,8 @@ from django import forms
from core.models import Snapshot, Tag
from core.forms import AddLinkForm, TagField
-from core.utils import get_icons
+from index.html import snapshot_icons
from util import htmldecode, urldecode, ansi_to_html
from logging_util import printable_filesize
from main import add, remove
@@ -128,7 +128,7 @@ class SnapshotAdmin(admin.ModelAdmin):
) + mark_safe(f' {tags}')
def files(self, obj):
- return get_icons(obj)
+ return snapshot_icons(obj)
def size(self, obj):
archive_size = obj.archive_size
diff --git a/archivebox/core/utils.py b/archivebox/core/utils.py
deleted file mode 100644
index 39dca220..00000000
--- a/archivebox/core/utils.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from django.utils.html import format_html
-from collections import defaultdict
-
-from core.models import Snapshot, EXTRACTORS
-from pathlib import Path
-
-
-def get_icons(snapshot: Snapshot) -> str:
- archive_results = snapshot.archiveresult_set.filter(status="succeeded")
- link = snapshot.as_link()
- path = link.archive_path
- canon = link.canonical_outputs()
- output = ""
- output_template = '{} '
- icons = {
- "singlefile": "❶",
- "wget": "🆆",
- "dom": "🅷",
- "pdf": "📄",
- "screenshot": "💻",
- "media": "📼",
- "git": "🅶",
- "archive_org": "🏛",
- "readability": "🆁",
- "mercury": "🅼",
- "warc": "📦"
- }
- exclude = ["favicon", "title", "headers", "archive_org"]
- # Missing specific entry for WARC
-
- extractor_items = defaultdict(lambda: None)
- for extractor, _ in EXTRACTORS:
- for result in archive_results:
- if result.extractor == extractor:
- extractor_items[extractor] = result
-
- for extractor, _ in EXTRACTORS:
- if extractor not in exclude:
- exists = extractor_items[extractor] is not None
- output += output_template.format(path, canon[f"{extractor}_path"], str(exists),
- extractor, icons.get(extractor, "?"))
- if extractor == "wget":
- # warc isn't technically it's own extractor, so we have to add it after wget
- exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
- output += output_template.format(exists[0] if exists else '#', canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?"))
-
- if extractor == "archive_org":
- # The check for archive_org is different, so it has to be handled separately
- target_path = Path(path) / "archive.org.txt"
- exists = target_path.exists()
- output += '{} '.format(canon["archive_org_path"], str(exists),
- "archive_org", icons.get("archive_org", "?"))
-
- return format_html(f'{output}')
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index e8b20aec..aaef74e2 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -12,7 +12,6 @@ from django.views.generic import FormView
from django.contrib.auth.mixins import UserPassesTestMixin
from core.models import Snapshot
-from core.utils import get_icons
from core.forms import AddLinkForm
from ..config import (
@@ -25,6 +24,7 @@ from ..config import (
)
from main import add
from ..util import base_url, ansi_to_html
+from ..index.html import snapshot_icons
class MainIndex(View):
@@ -108,7 +108,7 @@ class PublicArchiveView(ListView):
if query:
qs = Snapshot.objects.filter(title__icontains=query)
for snapshot in qs:
- snapshot.icons = get_icons(snapshot)
+ snapshot.icons = snapshot_icons(snapshot)
return qs
def get(self, *args, **kwargs):
diff --git a/archivebox/index/html.py b/archivebox/index/html.py
index 8b37c142..c107bb3b 100644
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@@ -5,8 +5,13 @@ from datetime import datetime
from typing import List, Optional, Iterator, Mapping
from pathlib import Path
+from django.utils.html import format_html
+from collections import defaultdict
+
+from pathlib import Path
+
from .schema import Link
-from ..system import atomic_write, copy_and_overwrite
+from ..system import atomic_write
from ..logging_util import printable_filesize
from ..util import (
enforce_types,
@@ -23,9 +28,6 @@ from ..config import (
FOOTER_INFO,
ARCHIVE_DIR_NAME,
HTML_INDEX_FILENAME,
- STATIC_DIR_NAME,
- ROBOTS_TXT_FILENAME,
- FAVICON_FILENAME,
)
MAIN_INDEX_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index.html')
@@ -143,3 +145,56 @@ def render_legacy_template(template_path: str, context: Mapping[str, str]) -> st
with open(template_path, 'r', encoding='utf-8') as template:
template_str = template.read()
return Template(template_str).substitute(**context)
+
+
+
+
+def snapshot_icons(snapshot) -> str:
+ from core.models import Snapshot, EXTRACTORS
+
+ archive_results = snapshot.archiveresult_set.filter(status="succeeded")
+ link = snapshot.as_link()
+ path = link.archive_path
+ canon = link.canonical_outputs()
+ output = ""
+ output_template = '{} '
+ icons = {
+ "singlefile": "❶",
+ "wget": "🆆",
+ "dom": "🅷",
+ "pdf": "📄",
+ "screenshot": "💻",
+ "media": "📼",
+ "git": "🅶",
+ "archive_org": "🏛",
+ "readability": "🆁",
+ "mercury": "🅼",
+ "warc": "📦"
+ }
+ exclude = ["favicon", "title", "headers", "archive_org"]
+ # Missing specific entry for WARC
+
+ extractor_items = defaultdict(lambda: None)
+ for extractor, _ in EXTRACTORS:
+ for result in archive_results:
+ if result.extractor == extractor:
+ extractor_items[extractor] = result
+
+ for extractor, _ in EXTRACTORS:
+ if extractor not in exclude:
+ exists = extractor_items[extractor] is not None
+ output += output_template.format(path, canon[f"{extractor}_path"], str(exists),
+ extractor, icons.get(extractor, "?"))
+ if extractor == "wget":
+ # warc isn't technically it's own extractor, so we have to add it after wget
+ exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
+ output += output_template.format(exists[0] if exists else '#', canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?"))
+
+ if extractor == "archive_org":
+ # The check for archive_org is different, so it has to be handled separately
+ target_path = Path(path) / "archive.org.txt"
+ exists = target_path.exists()
+ output += '{} '.format(canon["archive_org_path"], str(exists),
+ "archive_org", icons.get("archive_org", "?"))
+
+ return format_html(f'{output}')
diff --git a/archivebox/util.py b/archivebox/util.py
index 4e55e30d..733fe8f5 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -246,6 +246,7 @@ def chrome_args(**options) -> List[str]:
return cmd_args
+
def ansi_to_html(text):
"""
Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html