diff --git a/archivebox/extractors/readability.py b/archivebox/extractors/readability.py index f849d909..574dc09c 100644 --- a/archivebox/extractors/readability.py +++ b/archivebox/extractors/readability.py @@ -99,6 +99,8 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO except (Exception, OSError) as err: status = 'failed' output = err + + # prefer Chrome dom output to singlefile because singlefile often contains huge url(data:image/...base64) strings that make the html too long to parse with readability cmd = [cmd[0], './{dom,singlefile}.html'] finally: timer.end() diff --git a/archivebox/templates/admin/base.html b/archivebox/templates/admin/base.html index a29df4f5..5d4d4cc5 100644 --- a/archivebox/templates/admin/base.html +++ b/archivebox/templates/admin/base.html @@ -177,6 +177,7 @@ } {% endif %} +