1
0
Fork 0
mirror of synced 2024-06-28 11:00:35 +12:00

refactor: screenshot uses snapshot instead of link

This commit is contained in:
Cristian 2020-12-29 12:48:26 -05:00
parent ca7f48042b
commit 6230984cb3

View file

@ -3,6 +3,8 @@ __package__ = 'archivebox.extractors'
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from django.db.models import Model
from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
from ..system import run, chmod_file from ..system import run, chmod_file
from ..util import ( from ..util import (
@ -20,9 +22,9 @@ from ..logging_util import TimedProgress
@enforce_types @enforce_types
def should_save_screenshot(link: Link, out_dir: Optional[Path]=None) -> bool: def should_save_screenshot(snapshot: Model, out_dir: Optional[Path]=None) -> bool:
out_dir = out_dir or Path(link.link_dir) out_dir = out_dir or Path(snapshot.snapshot_dir)
if is_static_file(link.url): if is_static_file(snapshot.url):
return False return False
if (out_dir / "screenshot.png").exists(): if (out_dir / "screenshot.png").exists():
@ -31,15 +33,15 @@ def should_save_screenshot(link: Link, out_dir: Optional[Path]=None) -> bool:
return SAVE_SCREENSHOT return SAVE_SCREENSHOT
@enforce_types @enforce_types
def save_screenshot(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult: def save_screenshot(snapshot: Model, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
"""take screenshot of site using chrome --headless""" """take screenshot of site using chrome --headless"""
out_dir = out_dir or Path(link.link_dir) out_dir = out_dir or Path(snapshot.snapshot_dir)
output: ArchiveOutput = 'screenshot.png' output: ArchiveOutput = 'screenshot.png'
cmd = [ cmd = [
*chrome_args(TIMEOUT=timeout), *chrome_args(TIMEOUT=timeout),
'--screenshot', '--screenshot',
link.url, snapshot.url,
] ]
status = 'succeeded' status = 'succeeded'
timer = TimedProgress(timeout, prefix=' ') timer = TimedProgress(timeout, prefix=' ')