From a14762640e68d32dcb8aa29639fce7474c50a1d6 Mon Sep 17 00:00:00 2001 From: Cristian Date: Tue, 11 Aug 2020 07:40:55 -0500 Subject: [PATCH] feat: Avoid running readability when the target is a file --- archivebox/extractors/readability.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/archivebox/extractors/readability.py b/archivebox/extractors/readability.py index 0a3c13b3..8cac5e29 100644 --- a/archivebox/extractors/readability.py +++ b/archivebox/extractors/readability.py @@ -11,6 +11,7 @@ from ..system import run, atomic_write from ..util import ( enforce_types, download_url, + is_static_file, ) from ..config import ( @@ -26,6 +27,8 @@ from ..logging_util import TimedProgress @enforce_types def should_save_readability(link: Link, out_dir: Optional[str]=None) -> bool: out_dir = out_dir or link.link_dir + if is_static_file(link.url): + return False output = Path(out_dir or link.link_dir) / 'readability.json' return SAVE_READABILITY and (not output.exists())