From e594e6a75a2895077029d97b88d7b6f8b580885f Mon Sep 17 00:00:00 2001 From: Cristian Date: Thu, 12 Nov 2020 10:57:31 -0500 Subject: [PATCH] feat: WARC link points to the first warc result in target path --- archivebox/core/utils.py | 8 +++++--- archivebox/themes/default/base.html | 6 +++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/archivebox/core/utils.py b/archivebox/core/utils.py index a5fa2669..67b8004d 100644 --- a/archivebox/core/utils.py +++ b/archivebox/core/utils.py @@ -1,6 +1,7 @@ from django.utils.html import format_html from core.models import Snapshot, EXTRACTORS +from pathlib import Path def get_icons(snapshot: Snapshot) -> str: @@ -34,9 +35,10 @@ def get_icons(snapshot: Snapshot) -> str: exists, extractor, icons.get(extractor, "?")) if extractor == "wget": # warc isn't technically it's own extractor, so we have to add it after wget - - output += output_template.format(path, canon["warc_path"], - exists, "warc", icons.get("warc", "?")) + exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz")) + if exists: + output += output_template.format(exists[0], "", + True, "warc", icons.get("warc", "?")) except Exception as e: print(e) diff --git a/archivebox/themes/default/base.html b/archivebox/themes/default/base.html index ed7d1be9..cacd0597 100644 --- a/archivebox/themes/default/base.html +++ b/archivebox/themes/default/base.html @@ -223,6 +223,10 @@ .title-col a { color: black; } + + .exists-False { + display: none; + } @@ -283,4 +287,4 @@ - \ No newline at end of file +