From b1b6be4f13a403420a58fc8c06462c605deb16ed Mon Sep 17 00:00:00 2001 From: Aaron Fischer Date: Fri, 19 Oct 2018 22:35:08 +0200 Subject: [PATCH] merge_links() used wrong index Because merge_links() use the index, we need to get the new_links() _before_ we manipulate the index with write_links_index(). This has the negative side effect that the "Adding X new links ..." will output twice (because we execute merge_links() twice. For that, we only output stuff when the only_new is not set. --- archiver/archive.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/archiver/archive.py b/archiver/archive.py index b3384bc7..64aa0f25 100755 --- a/archiver/archive.py +++ b/archiver/archive.py @@ -64,7 +64,7 @@ def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False): all_links = validate_links(existing_links + all_links) num_new_links = len(all_links) - len(existing_links) - if num_new_links: + if num_new_links and not only_new: print('[{green}+{reset}] [{}] Adding {} new links from {} to {}/index.json'.format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), num_new_links, @@ -166,7 +166,8 @@ if __name__ == '__main__': # Step 1: Parse the links and dedupe them with existing archive links = merge_links(archive_path=out_dir, import_path=source, only_new=False) - + new_links = merge_links(archive_path=out_dir, import_path=source, only_new=True) + # Step 2: Write new index write_links_index(out_dir=out_dir, links=links) @@ -175,7 +176,6 @@ if __name__ == '__main__': # Step 4: Run the archive methods for each link if ONLY_NEW: - new_links = merge_links(archive_path=out_dir, import_path=source, only_new=True) update_archive(out_dir, new_links, source=source, resume=resume, append=True) else: update_archive(out_dir, links, source=source, resume=resume, append=True)