diff --git a/archivebox/extractors/git.py b/archivebox/extractors/git.py index fd20d4b6..a16e5bbc 100644 --- a/archivebox/extractors/git.py +++ b/archivebox/extractors/git.py @@ -4,6 +4,8 @@ __package__ = 'archivebox.extractors' from pathlib import Path from typing import Optional +from django.db.models import Model + from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from ..system import run, chmod_file from ..util import ( @@ -28,17 +30,17 @@ from ..logging_util import TimedProgress @enforce_types -def should_save_git(link: Link, out_dir: Optional[Path]=None) -> bool: - out_dir = out_dir or link.link_dir - if is_static_file(link.url): +def should_save_git(snapshot: Model, out_dir: Optional[Path]=None) -> bool: + out_dir = out_dir or snapshot.snapshot_dir + if is_static_file(snapshot.url): return False if (out_dir / "git").exists(): return False is_clonable_url = ( - (domain(link.url) in GIT_DOMAINS) - or (extension(link.url) == 'git') + (domain(snapshot.url) in GIT_DOMAINS) + or (extension(snapshot.url) == 'git') ) if not is_clonable_url: return False @@ -47,10 +49,10 @@ def should_save_git(link: Link, out_dir: Optional[Path]=None) -> bool: @enforce_types -def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult: +def save_git(snapshot: Model, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult: """download full site using git""" - out_dir = out_dir or Path(link.link_dir) + out_dir = out_dir or Path(snapshot.snapshot_dir) output: ArchiveOutput = 'git' output_path = out_dir / output output_path.mkdir(exist_ok=True) @@ -59,7 +61,7 @@ def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> 'clone', *GIT_ARGS, *([] if CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']), - without_query(without_fragment(link.url)), + without_query(without_fragment(snapshot.url)), ] status = 'succeeded' timer = TimedProgress(timeout, prefix=' ')