From 4484491fb77aeafe116aa5226d4c0cfd12e5de61 Mon Sep 17 00:00:00 2001 From: Cristian Date: Wed, 4 Nov 2020 11:22:55 -0500 Subject: [PATCH] feat: Create ArchiveResult after finishing an extractor process --- archivebox/core/utils.py | 6 ++---- archivebox/extractors/__init__.py | 7 +++++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/archivebox/core/utils.py b/archivebox/core/utils.py index 56c74b5c..78d0cff5 100644 --- a/archivebox/core/utils.py +++ b/archivebox/core/utils.py @@ -1,8 +1,6 @@ -from pathlib import Path - from django.utils.html import format_html -from core.models import Snapshot, ArchiveResult, EXTRACTORS +from core.models import Snapshot, EXTRACTORS def get_icons(snapshot: Snapshot) -> str: @@ -70,4 +68,4 @@ def get_icons(snapshot: Snapshot) -> str: # *link_tuple(link, 'git_path')[:2], any((out_dir / canon['git_path']).glob('*')), # canon['archive_org_path'], (out_dir / 'archive.org.txt').exists(), # ) -# \ No newline at end of file +# diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py index 60f20adf..d5d8832f 100644 --- a/archivebox/extractors/__init__.py +++ b/archivebox/extractors/__init__.py @@ -65,6 +65,10 @@ def ignore_methods(to_ignore: List[str]): def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None, skip_index: bool=False) -> Link: """download the DOM, PDF, and a screenshot into a folder named after the link's timestamp""" + # TODO: Remove when the input is changed to be a snapshot. Suboptimal approach. + from core.models import Snapshot, ArchiveResult + snapshot = Snapshot.objects.get(url=link.url) + ARCHIVE_METHODS = get_default_archive_methods() if methods: @@ -99,6 +103,9 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s stats[result.status] += 1 log_archive_method_finished(result) + ArchiveResult.objects.create(snapshot=snapshot, extractor=method_name, cmd=result.cmd, cmd_version=result.cmd_version, + output=result.output, pwd=result.pwd, start_ts=result.start_ts, end_ts=result.end_ts, status=result.status) + else: # print('{black} X {}{reset}'.format(method_name, **ANSI)) stats['skipped'] += 1