diff --git a/archivebox/extractors/mercury.py b/archivebox/extractors/mercury.py index d9e32c0a..59cfc6f6 100644 --- a/archivebox/extractors/mercury.py +++ b/archivebox/extractors/mercury.py @@ -59,6 +59,8 @@ def save_mercury(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) status = 'succeeded' timer = TimedProgress(timeout, prefix=' ') try: + output_folder.mkdir(exist_ok=True) + # Get plain text version of article cmd = [ DEPENDENCIES['MERCURY_BINARY']['path'], @@ -71,6 +73,8 @@ def save_mercury(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) except json.JSONDecodeError: raise ShellError(cmd, result) + atomic_write(str(output_folder / "content.txt"), article_text["content"]) + # Get HTML version of article cmd = [ DEPENDENCIES['MERCURY_BINARY']['path'], @@ -82,9 +86,7 @@ def save_mercury(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) except json.JSONDecodeError: raise ShellError(cmd, result) - output_folder.mkdir(exist_ok=True) atomic_write(str(output_folder / "content.html"), article_json.pop("content")) - atomic_write(str(output_folder / "content.txt"), article_text["content"]) atomic_write(str(output_folder / "article.json"), article_json) # Check for common failure cases diff --git a/archivebox/extractors/readability.py b/archivebox/extractors/readability.py index 6e48cd9a..3e7f2069 100644 --- a/archivebox/extractors/readability.py +++ b/archivebox/extractors/readability.py @@ -112,6 +112,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO except (Exception, OSError) as err: status = 'failed' output = err + cmd = [cmd[0], './{singlefile,dom}.html'] finally: timer.end()