From 9fdcb9857ec58aa309be74aef0e7d723f92d3527 Mon Sep 17 00:00:00 2001
From: Cristian <cristianvargasvalencia@gmail.com>
Date: Thu, 31 Dec 2020 13:21:40 -0500
Subject: [PATCH] refactor: remove command functional

---
 archivebox/index/__init__.py | 15 +++++++--------
 archivebox/logging_util.py   | 30 +++++++++++++++---------------
 archivebox/main.py           | 19 +++++++++----------
 3 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py
index fb6ffe7d..aff1d36b 100644
--- a/archivebox/index/__init__.py
+++ b/archivebox/index/__init__.py
@@ -43,7 +43,6 @@ from .html import (
 )
 from .json import (
     load_json_snapshot,
-    parse_json_snapshot_details, 
     write_json_snapshot_details,
 )
 from .sql import (
@@ -321,7 +320,7 @@ def load_snapshot_details(snapshot: Model, out_dir: Optional[str]=None) -> Model
     """
     out_dir = out_dir or Path(snapshot.snapshot_dir)
 
-    existing_snapshot = load_json_snapshot_details(out_dir)
+    existing_snapshot = load_json_snapshot_details(Path(out_dir))
     if existing_snapshot:
         return merge_snapshots(existing_snapshot, snapshot)
 
@@ -402,7 +401,7 @@ def get_present_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Option
         if entry.is_dir():
             snapshot = None
             try:
-                snapshot = parse_json_snapshot_details(entry.path)
+                snapshot = load_json_snapshot(Path(entry.path))
             except Exception:
                 pass
 
@@ -441,7 +440,7 @@ def get_duplicate_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Opti
             path = path.snapshot_dir
 
         try:
-            snapshot = load_json_snapshot_details(path)
+            snapshot = load_json_snapshot(Path(path))
         except Exception:
             pass
 
@@ -465,7 +464,7 @@ def get_orphaned_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optio
         if entry.is_dir():
             snapshot = None
             try:
-                snapshot = parse_json_snapshot_details(str(entry))
+                snapshot = load_json_snapshot(str(entry))
             except Exception:
                 pass
 
@@ -492,7 +491,7 @@ def get_unrecognized_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, O
             index_exists = (entry / "index.json").exists()
             snapshot = None
             try:
-                snapshot = parse_json_snapshot_details(str(entry))
+                snapshot = load_json_snapshot(entry)
             except KeyError:
                 # Try to fix index
                 if index_exists:
@@ -562,13 +561,13 @@ def fix_invalid_folder_locations(out_dir: Path=OUTPUT_DIR) -> Tuple[List[str], L
         if entry.is_dir(follow_symlinks=True):
             if (Path(entry.path) / 'index.json').exists():
                 try:
-                    snapshot = parse_json_snapshot_details(entry.path)
+                    snapshot = load_json_snapshot(Path(entry.path))
                 except KeyError:
                     snapshot = None
                 if not snapshot:
                     continue
 
-                if not entry.path.endswith(f'/{link.timestamp}'):
+                if not entry.path.endswith(f'/{snapshot.timestamp}'):
                     dest = out_dir / ARCHIVE_DIR_NAME / snapshot.timestamp
                     if dest.exists():
                         cant_fix.append(entry.path)
diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py
index f2b86735..2f564e6b 100644
--- a/archivebox/logging_util.py
+++ b/archivebox/logging_util.py
@@ -395,49 +395,49 @@ def log_list_started(filter_patterns: Optional[List[str]], filter_type: str):
     ))
     print('    {}'.format(' '.join(filter_patterns or ())))
 
-def log_list_finished(links):
-    from .index.csv import links_to_csv
+def log_list_finished(snapshots):
+    from .index.csv import snapshots_to_csv
     print()
     print('---------------------------------------------------------------------------------------------------')
-    print(links_to_csv(links, cols=['timestamp', 'is_archived', 'num_outputs', 'url'], header=True, ljust=16, separator=' | '))
+    print(snapshots_to_csv(snapshots, cols=['timestamp', 'is_archived', 'num_outputs', 'url'], header=True, ljust=16, separator=' | '))
     print('---------------------------------------------------------------------------------------------------')
     print()
 
 
-def log_removal_started(links: List["Link"], yes: bool, delete: bool):
-    print('{lightyellow}[i] Found {} matching URLs to remove.{reset}'.format(len(links), **ANSI))
+def log_removal_started(snapshots: List["Snapshot"], yes: bool, delete: bool):
+    print('{lightyellow}[i] Found {} matching URLs to remove.{reset}'.format(len(snapshots), **ANSI))
     if delete:
-        file_counts = [link.num_outputs for link in links if Path(link.link_dir).exists()]
+        file_counts = [snapshot.num_outputs for snapshot in snapshots if Path(snapshot.snapshot_dir).exists()]
         print(
-            f'    {len(links)} Links will be de-listed from the main index, and their archived content folders will be deleted from disk.\n'
+            f'    {len(snapshots)} Snapshots will be de-listed from the main index, and their archived content folders will be deleted from disk.\n'
             f'    ({len(file_counts)} data folders with {sum(file_counts)} archived files will be deleted!)'
         )
     else:
         print(
-            '    Matching links will be de-listed from the main index, but their archived content folders will remain in place on disk.\n'
+            '    Matching snapshots will be de-listed from the main index, but their archived content folders will remain in place on disk.\n'
             '    (Pass --delete if you also want to permanently delete the data folders)'
         )
 
     if not yes:
         print()
-        print('{lightyellow}[?] Do you want to proceed with removing these {} links?{reset}'.format(len(links), **ANSI))
+        print('{lightyellow}[?] Do you want to proceed with removing these {} snapshots?{reset}'.format(len(snapshots), **ANSI))
         try:
             assert input('    y/[n]: ').lower() == 'y'
         except (KeyboardInterrupt, EOFError, AssertionError):
             raise SystemExit(0)
 
-def log_removal_finished(all_links: int, to_remove: int):
-    if all_links == 0:
+def log_removal_finished(all_snapshots: int, to_remove: int):
+    if to_remove == 0:
         print()
-        print('{red}[X] No matching links found.{reset}'.format(**ANSI))
+        print('{red}[X] No matching snapshots found.{reset}'.format(**ANSI))
     else:
         print()
-        print('{red}[√] Removed {} out of {} links from the archive index.{reset}'.format(
+        print('{red}[√] Removed {} out of {} snapshots from the archive index.{reset}'.format(
             to_remove,
-            all_links,
+            all_snapshots,
             **ANSI,
         ))
-        print('    Index now contains {} links.'.format(all_links - to_remove))
+        print('    Index now contains {} snapshots.'.format(all_snapshots - to_remove))
 
 
 def log_shell_welcome_msg():
diff --git a/archivebox/main.py b/archivebox/main.py
index f33fabd8..5dc9484f 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -336,7 +336,7 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
     print('{green}[*] Collecting links from any existing indexes and archive folders...{reset}'.format(**ANSI))
 
     all_links = Snapshot.objects.none()
-    pending_links: Dict[str, Link] = {}
+    pending_snapshots: Dict[str, Link] = {}
 
     if existing_index:
         all_snapshots = load_main_index(out_dir=out_dir, warn=False)
@@ -363,10 +363,10 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
     orphaned_data_dir_snapshots = {
         snapshot.url: snapshot
         for snapshot in parse_json_snapshot_details(out_dir)
-        if not all_snapshots.filter(url=link.url).exists()
+        if not all_snapshots.filter(url=snapshot.url).exists()
     }
     if orphaned_data_dir_snapshots:
-        pending_snapshots.update(orphaned_data_dir_links)
+        pending_snapshots.update(orphaned_data_dir_snapshots)
         print('    {lightyellow}√ Added {} orphaned snapshots from existing archive directories.{reset}'.format(len(orphaned_data_dir_snapshots), **ANSI))
 
     # Links in invalid/duplicate data dirs
@@ -383,7 +383,7 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
         print('        archivebox list --status=invalid')
 
 
-    write_main_index(list(pending_links.values()), out_dir=out_dir)
+    write_main_index(list(pending_snapshots.values()), out_dir=out_dir)
 
     print('\n{green}------------------------------------------------------------------{reset}'.format(**ANSI))
     if existing_index:
@@ -656,24 +656,23 @@ def remove(filter_str: Optional[str]=None,
         raise SystemExit(1)
 
 
-    log_links = [link.as_link() for link in snapshots]
-    log_list_finished(log_links)
-    log_removal_started(log_links, yes=yes, delete=delete)
+    log_list_finished(snapshots)
+    log_removal_started(snapshots, yes=yes, delete=delete)
 
     timer = TimedProgress(360, prefix='      ')
     try:
         for snapshot in snapshots:
             if delete:
-                shutil.rmtree(snapshot.as_link().link_dir, ignore_errors=True)
+                shutil.rmtree(snapshot.snapshot_dir, ignore_errors=True)
     finally:
         timer.end()
 
     to_remove = snapshots.count()
+    all_snapshots = load_main_index(out_dir=out_dir).count()
 
     flush_search_index(snapshots=snapshots)
     remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir)
-    all_snapshots = load_main_index(out_dir=out_dir)
-    log_removal_finished(all_snapshots.count(), to_remove)
+    log_removal_finished(all_snapshots, to_remove)
     
     return all_snapshots