diff --git a/archivebox/core/utils.py b/archivebox/core/utils.py index 3c310525..9804d6ee 100644 --- a/archivebox/core/utils.py +++ b/archivebox/core/utils.py @@ -1,4 +1,5 @@ from django.utils.html import format_html +from collections import defaultdict from core.models import Snapshot, EXTRACTORS from pathlib import Path @@ -7,9 +8,10 @@ from pathlib import Path def get_icons(snapshot: Snapshot) -> str: archive_results = snapshot.archiveresult_set.filter(status="succeeded") link = snapshot.as_link() + path = link.archive_path canon = link.canonical_outputs() output = "" - output_template = '{} ' + output_template = '{} ' icons = { "singlefile": "❶", "wget": "🆆", @@ -23,34 +25,31 @@ def get_icons(snapshot: Snapshot) -> str: "mercury": "🅼", "warc": "📦" } - exclude = ["favicon", "archive_org"] + exclude = ["favicon", "title", "headers", "archive_org"] # Missing specific entry for WARC + extractor_items = defaultdict(lambda: None) for extractor, _ in EXTRACTORS: for result in archive_results: - if result.extractor != extractor: - continue - path = link.archive_path - try: - if extractor not in exclude: - output += output_template.format(path, canon[f"{extractor}_path"], - extractor, icons.get(extractor, "?")) - if extractor == "wget": - # warc isn't technically it's own extractor, so we have to add it after wget - exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz")) - if exists: - output += output_template.format(exists[0], "", - "warc", icons.get("warc", "?")) + if result.extractor == extractor: + extractor_items[extractor] = result - if extractor == "archive_org": - # The check for archive_org is different, so it has to be handled separately - target_path = Path(path) / "archive.org.txt" - exists = target_path.exists() - if exists: - output += '{} '.format(canon["archive_org_path"], - "archive_org", icons.get("archive_org", "?")) + for extractor, _ in EXTRACTORS: + if extractor not in exclude: + exists = extractor_items[extractor] is not None + output += output_template.format(path, canon[f"{extractor}_path"], str(exists), + extractor, icons.get(extractor, "?")) + if extractor == "wget": + # warc isn't technically it's own extractor, so we have to add it after wget + exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz")) + if exists: + output += output_template.format(exists[0], "", str(bool(exists)), "warc", icons.get("warc", "?")) - except Exception as e: - print(e) + if extractor == "archive_org": + # The check for archive_org is different, so it has to be handled separately + target_path = Path(path) / "archive.org.txt" + exists = target_path.exists() + output += '{} '.format(canon["archive_org_path"], str(exists), + "archive_org", icons.get("archive_org", "?")) return format_html(f'{output}') diff --git a/archivebox/themes/default/base.html b/archivebox/themes/default/base.html index f778da16..77d912d5 100644 --- a/archivebox/themes/default/base.html +++ b/archivebox/themes/default/base.html @@ -226,6 +226,7 @@ .exists-False { opacity: 0.1; + pointer-events: none; }