1
0
Fork 0
mirror of synced 2024-09-30 00:56:17 +13:00

accept methods argument to filder archive_link

This commit is contained in:
Nick Sweeting 2020-07-28 05:58:38 -04:00
parent 032c2458de
commit 2e0b751376

View file

@ -2,7 +2,7 @@ __package__ = 'archivebox.extractors'
import os import os
from typing import Optional, List from typing import Optional, List, Iterable
from datetime import datetime from datetime import datetime
from ..index.schema import Link from ..index.schema import Link
@ -34,10 +34,10 @@ from .archive_org import should_save_archive_dot_org, save_archive_dot_org
@enforce_types @enforce_types
def archive_link(link: Link, overwrite: bool=False, out_dir: Optional[str]=None) -> Link: def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[str]=None) -> Link:
"""download the DOM, PDF, and a screenshot into a folder named after the link's timestamp""" """download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""
ARCHIVE_METHODS = ( ARCHIVE_METHODS = [
('title', should_save_title, save_title), ('title', should_save_title, save_title),
('favicon', should_save_favicon, save_favicon), ('favicon', should_save_favicon, save_favicon),
('wget', should_save_wget, save_wget), ('wget', should_save_wget, save_wget),
@ -47,7 +47,12 @@ def archive_link(link: Link, overwrite: bool=False, out_dir: Optional[str]=None)
('git', should_save_git, save_git), ('git', should_save_git, save_git),
('media', should_save_media, save_media), ('media', should_save_media, save_media),
('archive_org', should_save_archive_dot_org, save_archive_dot_org), ('archive_org', should_save_archive_dot_org, save_archive_dot_org),
) ]
if methods is not None:
ARCHIVE_METHODS = [
method for method in ARCHIVE_METHODS
if method[1] in methods
]
out_dir = out_dir or link.link_dir out_dir = out_dir or link.link_dir
try: try: