From 71655220ad8554458978a078e604cb2b57fa2e1c Mon Sep 17 00:00:00 2001 From: Cristian Date: Thu, 5 Nov 2020 07:54:40 -0500 Subject: [PATCH] feat: Add warc to list and limit check to succeeded archive results --- archivebox/core/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/archivebox/core/utils.py b/archivebox/core/utils.py index 78d0cff5..1a073fa4 100644 --- a/archivebox/core/utils.py +++ b/archivebox/core/utils.py @@ -20,17 +20,21 @@ def get_icons(snapshot: Snapshot) -> str: "archive_org": "🏛", "readability": "🆁", "mercury": "🅼", + "warc": "📦" } exclude = ["favicon"] # Missing specific entry for WARC - for extractor in EXTRACTORS: - result = archive_results.filter(extractor=extractor[0]) + result = archive_results.filter(extractor=extractor[0], status="succeeded") try: if extractor[0] not in exclude: output += output_template.format(link.archive_path, canon[f"{extractor[0]}_path"], result.exists(), extractor[0], icons.get(extractor[0], "?")) + if extractor[0] == "wget": + extractor = "warc" + output += output_template.format(link.archive_path, canon[f"{extractor}_path"], + result.exists(), extractor, icons.get(extractor, "?")) except Exception as e: print(e)