diff --git a/archivebox/archive.py b/archivebox/archive.py index a1f6fdbb..5aacc003 100755 --- a/archivebox/archive.py +++ b/archivebox/archive.py @@ -28,7 +28,7 @@ from config import ( from util import ( check_dependencies, download_url, - save_source, + save_stdin_source, pretty_path, migrate_data, check_links_structure, @@ -204,8 +204,7 @@ if __name__ == '__main__': if source and any(source.startswith(s) for s in ('http://', 'https://', 'ftp://')): source = download_url(source) elif stdin_raw_text: - source = save_source(stdin_raw_text) - + source = save_stdin_source(stdin_raw_text) # Step 1: Parse the links and dedupe them with existing archive all_links, new_links = load_links(archive_path=out_dir, import_path=source) @@ -213,15 +212,12 @@ if __name__ == '__main__': # Step 2: Write new index write_links_index(out_dir=out_dir, links=all_links) - # Step 3: Verify folder structure is 1:1 with index - # cleanup_archive(out_dir, links) - - # Step 4: Run the archive methods for each link + # Step 3: Run the archive methods for each link if ONLY_NEW: update_archive(out_dir, new_links, source=source, resume=resume, append=True) else: update_archive(out_dir, all_links, source=source, resume=resume, append=True) - # Step 5: Re-write links index with updated titles, icons, and resources + # Step 4: Re-write links index with updated titles, icons, and resources all_links, _ = load_links(archive_path=out_dir) write_links_index(out_dir=out_dir, links=all_links) diff --git a/archivebox/util.py b/archivebox/util.py index 2a30fcb0..fcaa13b0 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -205,7 +205,7 @@ def pretty_path(path): return path.replace(REPO_DIR + '/', '') -def save_source(raw_text): +def save_stdin_source(raw_text): if not os.path.exists(SOURCES_DIR): os.makedirs(SOURCES_DIR) @@ -233,7 +233,7 @@ def fetch_page_content(url, timeout=TIMEOUT): return resp.read().decode(encoding) -def download_url(url, timeout=TIMEOUT): +def save_remote_source(url, timeout=TIMEOUT): """download a given url's content into downloads/domain.txt""" if not os.path.exists(SOURCES_DIR): @@ -265,7 +265,7 @@ def download_url(url, timeout=TIMEOUT): with open(source_path, 'w', encoding='utf-8') as f: f.write(downloaded_xml) - + print(' > {}'.format(pretty_path(source_path))) return source_path