From de3c82730cc7e7cc02ddb2d923acd265567b5517 Mon Sep 17 00:00:00 2001 From: Cristian Date: Tue, 29 Dec 2020 14:11:31 -0500 Subject: [PATCH] refactor: archive_org uses snapshot instead of link --- archivebox/extractors/archive_org.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/archivebox/extractors/archive_org.py b/archivebox/extractors/archive_org.py index f5598d6f..f88cdc9f 100644 --- a/archivebox/extractors/archive_org.py +++ b/archivebox/extractors/archive_org.py @@ -5,6 +5,8 @@ from pathlib import Path from typing import Optional, List, Dict, Tuple from collections import defaultdict +from django.db.models import Model + from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from ..system import run, chmod_file from ..util import ( @@ -25,9 +27,9 @@ from ..logging_util import TimedProgress @enforce_types -def should_save_archive_dot_org(link: Link, out_dir: Optional[Path]=None) -> bool: - out_dir = out_dir or Path(link.link_dir) - if is_static_file(link.url): +def should_save_archive_dot_org(snapshot: Model, out_dir: Optional[Path]=None) -> bool: + out_dir = out_dir or Path(snapshot.snapshot_dir) + if is_static_file(snapshot.url): return False if (out_dir / "archive.org.txt").exists(): @@ -37,13 +39,13 @@ def should_save_archive_dot_org(link: Link, out_dir: Optional[Path]=None) -> boo return SAVE_ARCHIVE_DOT_ORG @enforce_types -def save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult: +def save_archive_dot_org(snapshot: Model, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult: """submit site to archive.org for archiving via their service, save returned archive url""" - out_dir = out_dir or Path(link.link_dir) + out_dir = out_dir or Path(snapshot.snapshot_dir) output: ArchiveOutput = 'archive.org.txt' archive_org_url = None - submit_url = 'https://web.archive.org/save/{}'.format(link.url) + submit_url = 'https://web.archive.org/save/{}'.format(snapshot.url) cmd = [ CURL_BINARY, *CURL_ARGS,