From a6767671fb68e25f67edcf16afafe5234d2826dd Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 9 May 2022 21:21:39 -0700 Subject: [PATCH] append content of referenced files to imports --- archivebox/parsers/__init__.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py index 2ce01e1a..fef1efe1 100644 --- a/archivebox/parsers/__init__.py +++ b/archivebox/parsers/__init__.py @@ -150,7 +150,17 @@ def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None, def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str: ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0] source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts)) - atomic_write(source_path, raw_text) + + referenced_texts = '' + + for entry in raw_text.split(): + try: + if Path(entry).exists: + referenced_texts += Path(entry).read_text() + except Exception as err: + print(err) + + atomic_write(source_path, raw_text + '\n' + referenced_texts) log_source_saved(source_file=source_path) return source_path