diff --git a/archivebox/extractors/dom.py b/archivebox/extractors/dom.py index babbe71c..d2582f74 100644 --- a/archivebox/extractors/dom.py +++ b/archivebox/extractors/dom.py @@ -3,6 +3,8 @@ __package__ = 'archivebox.extractors' from pathlib import Path from typing import Optional +from django.db.models import Model + from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from ..system import run, chmod_file, atomic_write from ..util import ( @@ -20,9 +22,9 @@ from ..logging_util import TimedProgress @enforce_types -def should_save_dom(link: Link, out_dir: Optional[Path]=None) -> bool: - out_dir = out_dir or Path(link.link_dir) - if is_static_file(link.url): +def should_save_dom(snapshot: Model, out_dir: Optional[Path]=None) -> bool: + out_dir = out_dir or Path(snapshot.snapshot_dir) + if is_static_file(snapshot.url): return False if (out_dir / 'output.html').exists(): @@ -31,16 +33,16 @@ def should_save_dom(link: Link, out_dir: Optional[Path]=None) -> bool: return SAVE_DOM @enforce_types -def save_dom(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult: +def save_dom(snapshot: Model, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult: """print HTML of site to file using chrome --dump-html""" - out_dir = out_dir or Path(link.link_dir) + out_dir = out_dir or Path(snapshot.snapshot_dir) output: ArchiveOutput = 'output.html' output_path = out_dir / output cmd = [ *chrome_args(TIMEOUT=timeout), '--dump-dom', - link.url + snapshot.url ] status = 'succeeded' timer = TimedProgress(timeout, prefix=' ')